[AArch64] Logical vector shift right conformance
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob747a292ba9b2260e74566c946fe57afaea267969
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef int64_t int64x1_t;
42 typedef int32_t int32x1_t;
43 typedef int16_t int16x1_t;
44 typedef int8_t int8x1_t;
45 typedef double float64x1_t;
46 typedef __builtin_aarch64_simd_sf float32x2_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_poly8 poly8x8_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly16 poly16x4_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_uqi uint8x8_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uhi uint16x4_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_usi uint32x2_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef uint64_t uint64x1_t;
59 typedef uint32_t uint32x1_t;
60 typedef uint16_t uint16x1_t;
61 typedef uint8_t uint8x1_t;
62 typedef __builtin_aarch64_simd_qi int8x16_t
63 __attribute__ ((__vector_size__ (16)));
64 typedef __builtin_aarch64_simd_hi int16x8_t
65 __attribute__ ((__vector_size__ (16)));
66 typedef __builtin_aarch64_simd_si int32x4_t
67 __attribute__ ((__vector_size__ (16)));
68 typedef __builtin_aarch64_simd_di int64x2_t
69 __attribute__ ((__vector_size__ (16)));
70 typedef __builtin_aarch64_simd_sf float32x4_t
71 __attribute__ ((__vector_size__ (16)));
72 typedef __builtin_aarch64_simd_df float64x2_t
73 __attribute__ ((__vector_size__ (16)));
74 typedef __builtin_aarch64_simd_poly8 poly8x16_t
75 __attribute__ ((__vector_size__ (16)));
76 typedef __builtin_aarch64_simd_poly16 poly16x8_t
77 __attribute__ ((__vector_size__ (16)));
78 typedef __builtin_aarch64_simd_poly64 poly64x2_t
79 __attribute__ ((__vector_size__ (16)));
80 typedef __builtin_aarch64_simd_uqi uint8x16_t
81 __attribute__ ((__vector_size__ (16)));
82 typedef __builtin_aarch64_simd_uhi uint16x8_t
83 __attribute__ ((__vector_size__ (16)));
84 typedef __builtin_aarch64_simd_usi uint32x4_t
85 __attribute__ ((__vector_size__ (16)));
86 typedef __builtin_aarch64_simd_udi uint64x2_t
87 __attribute__ ((__vector_size__ (16)));
89 typedef float float32_t;
90 typedef double float64_t;
91 typedef __builtin_aarch64_simd_poly8 poly8_t;
92 typedef __builtin_aarch64_simd_poly16 poly16_t;
93 typedef __builtin_aarch64_simd_poly64 poly64_t;
94 typedef __builtin_aarch64_simd_poly128 poly128_t;
96 typedef struct int8x8x2_t
98 int8x8_t val[2];
99 } int8x8x2_t;
101 typedef struct int8x16x2_t
103 int8x16_t val[2];
104 } int8x16x2_t;
106 typedef struct int16x4x2_t
108 int16x4_t val[2];
109 } int16x4x2_t;
111 typedef struct int16x8x2_t
113 int16x8_t val[2];
114 } int16x8x2_t;
116 typedef struct int32x2x2_t
118 int32x2_t val[2];
119 } int32x2x2_t;
121 typedef struct int32x4x2_t
123 int32x4_t val[2];
124 } int32x4x2_t;
126 typedef struct int64x1x2_t
128 int64x1_t val[2];
129 } int64x1x2_t;
131 typedef struct int64x2x2_t
133 int64x2_t val[2];
134 } int64x2x2_t;
136 typedef struct uint8x8x2_t
138 uint8x8_t val[2];
139 } uint8x8x2_t;
141 typedef struct uint8x16x2_t
143 uint8x16_t val[2];
144 } uint8x16x2_t;
146 typedef struct uint16x4x2_t
148 uint16x4_t val[2];
149 } uint16x4x2_t;
151 typedef struct uint16x8x2_t
153 uint16x8_t val[2];
154 } uint16x8x2_t;
156 typedef struct uint32x2x2_t
158 uint32x2_t val[2];
159 } uint32x2x2_t;
161 typedef struct uint32x4x2_t
163 uint32x4_t val[2];
164 } uint32x4x2_t;
166 typedef struct uint64x1x2_t
168 uint64x1_t val[2];
169 } uint64x1x2_t;
171 typedef struct uint64x2x2_t
173 uint64x2_t val[2];
174 } uint64x2x2_t;
176 typedef struct float32x2x2_t
178 float32x2_t val[2];
179 } float32x2x2_t;
181 typedef struct float32x4x2_t
183 float32x4_t val[2];
184 } float32x4x2_t;
186 typedef struct float64x2x2_t
188 float64x2_t val[2];
189 } float64x2x2_t;
191 typedef struct float64x1x2_t
193 float64x1_t val[2];
194 } float64x1x2_t;
196 typedef struct poly8x8x2_t
198 poly8x8_t val[2];
199 } poly8x8x2_t;
201 typedef struct poly8x16x2_t
203 poly8x16_t val[2];
204 } poly8x16x2_t;
206 typedef struct poly16x4x2_t
208 poly16x4_t val[2];
209 } poly16x4x2_t;
211 typedef struct poly16x8x2_t
213 poly16x8_t val[2];
214 } poly16x8x2_t;
216 typedef struct int8x8x3_t
218 int8x8_t val[3];
219 } int8x8x3_t;
221 typedef struct int8x16x3_t
223 int8x16_t val[3];
224 } int8x16x3_t;
226 typedef struct int16x4x3_t
228 int16x4_t val[3];
229 } int16x4x3_t;
231 typedef struct int16x8x3_t
233 int16x8_t val[3];
234 } int16x8x3_t;
236 typedef struct int32x2x3_t
238 int32x2_t val[3];
239 } int32x2x3_t;
241 typedef struct int32x4x3_t
243 int32x4_t val[3];
244 } int32x4x3_t;
246 typedef struct int64x1x3_t
248 int64x1_t val[3];
249 } int64x1x3_t;
251 typedef struct int64x2x3_t
253 int64x2_t val[3];
254 } int64x2x3_t;
256 typedef struct uint8x8x3_t
258 uint8x8_t val[3];
259 } uint8x8x3_t;
261 typedef struct uint8x16x3_t
263 uint8x16_t val[3];
264 } uint8x16x3_t;
266 typedef struct uint16x4x3_t
268 uint16x4_t val[3];
269 } uint16x4x3_t;
271 typedef struct uint16x8x3_t
273 uint16x8_t val[3];
274 } uint16x8x3_t;
276 typedef struct uint32x2x3_t
278 uint32x2_t val[3];
279 } uint32x2x3_t;
281 typedef struct uint32x4x3_t
283 uint32x4_t val[3];
284 } uint32x4x3_t;
286 typedef struct uint64x1x3_t
288 uint64x1_t val[3];
289 } uint64x1x3_t;
291 typedef struct uint64x2x3_t
293 uint64x2_t val[3];
294 } uint64x2x3_t;
296 typedef struct float32x2x3_t
298 float32x2_t val[3];
299 } float32x2x3_t;
301 typedef struct float32x4x3_t
303 float32x4_t val[3];
304 } float32x4x3_t;
306 typedef struct float64x2x3_t
308 float64x2_t val[3];
309 } float64x2x3_t;
311 typedef struct float64x1x3_t
313 float64x1_t val[3];
314 } float64x1x3_t;
316 typedef struct poly8x8x3_t
318 poly8x8_t val[3];
319 } poly8x8x3_t;
321 typedef struct poly8x16x3_t
323 poly8x16_t val[3];
324 } poly8x16x3_t;
326 typedef struct poly16x4x3_t
328 poly16x4_t val[3];
329 } poly16x4x3_t;
331 typedef struct poly16x8x3_t
333 poly16x8_t val[3];
334 } poly16x8x3_t;
336 typedef struct int8x8x4_t
338 int8x8_t val[4];
339 } int8x8x4_t;
341 typedef struct int8x16x4_t
343 int8x16_t val[4];
344 } int8x16x4_t;
346 typedef struct int16x4x4_t
348 int16x4_t val[4];
349 } int16x4x4_t;
351 typedef struct int16x8x4_t
353 int16x8_t val[4];
354 } int16x8x4_t;
356 typedef struct int32x2x4_t
358 int32x2_t val[4];
359 } int32x2x4_t;
361 typedef struct int32x4x4_t
363 int32x4_t val[4];
364 } int32x4x4_t;
366 typedef struct int64x1x4_t
368 int64x1_t val[4];
369 } int64x1x4_t;
371 typedef struct int64x2x4_t
373 int64x2_t val[4];
374 } int64x2x4_t;
376 typedef struct uint8x8x4_t
378 uint8x8_t val[4];
379 } uint8x8x4_t;
381 typedef struct uint8x16x4_t
383 uint8x16_t val[4];
384 } uint8x16x4_t;
386 typedef struct uint16x4x4_t
388 uint16x4_t val[4];
389 } uint16x4x4_t;
391 typedef struct uint16x8x4_t
393 uint16x8_t val[4];
394 } uint16x8x4_t;
396 typedef struct uint32x2x4_t
398 uint32x2_t val[4];
399 } uint32x2x4_t;
401 typedef struct uint32x4x4_t
403 uint32x4_t val[4];
404 } uint32x4x4_t;
406 typedef struct uint64x1x4_t
408 uint64x1_t val[4];
409 } uint64x1x4_t;
411 typedef struct uint64x2x4_t
413 uint64x2_t val[4];
414 } uint64x2x4_t;
416 typedef struct float32x2x4_t
418 float32x2_t val[4];
419 } float32x2x4_t;
421 typedef struct float32x4x4_t
423 float32x4_t val[4];
424 } float32x4x4_t;
426 typedef struct float64x2x4_t
428 float64x2_t val[4];
429 } float64x2x4_t;
431 typedef struct float64x1x4_t
433 float64x1_t val[4];
434 } float64x1x4_t;
436 typedef struct poly8x8x4_t
438 poly8x8_t val[4];
439 } poly8x8x4_t;
441 typedef struct poly8x16x4_t
443 poly8x16_t val[4];
444 } poly8x16x4_t;
446 typedef struct poly16x4x4_t
448 poly16x4_t val[4];
449 } poly16x4x4_t;
451 typedef struct poly16x8x4_t
453 poly16x8_t val[4];
454 } poly16x8x4_t;
456 /* vget_lane internal macros. */
458 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
459 (__cast_ret \
460 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
462 #define __aarch64_vget_lane_f32(__a, __b) \
463 __aarch64_vget_lane_any (v2sf, , , __a, __b)
464 #define __aarch64_vget_lane_f64(__a, __b) (__a)
466 #define __aarch64_vget_lane_p8(__a, __b) \
467 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
468 #define __aarch64_vget_lane_p16(__a, __b) \
469 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
471 #define __aarch64_vget_lane_s8(__a, __b) \
472 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
473 #define __aarch64_vget_lane_s16(__a, __b) \
474 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
475 #define __aarch64_vget_lane_s32(__a, __b) \
476 __aarch64_vget_lane_any (v2si, , ,__a, __b)
477 #define __aarch64_vget_lane_s64(__a, __b) (__a)
479 #define __aarch64_vget_lane_u8(__a, __b) \
480 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
481 #define __aarch64_vget_lane_u16(__a, __b) \
482 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
483 #define __aarch64_vget_lane_u32(__a, __b) \
484 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
485 #define __aarch64_vget_lane_u64(__a, __b) (__a)
487 #define __aarch64_vgetq_lane_f32(__a, __b) \
488 __aarch64_vget_lane_any (v4sf, , , __a, __b)
489 #define __aarch64_vgetq_lane_f64(__a, __b) \
490 __aarch64_vget_lane_any (v2df, , , __a, __b)
492 #define __aarch64_vgetq_lane_p8(__a, __b) \
493 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
494 #define __aarch64_vgetq_lane_p16(__a, __b) \
495 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
497 #define __aarch64_vgetq_lane_s8(__a, __b) \
498 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
499 #define __aarch64_vgetq_lane_s16(__a, __b) \
500 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
501 #define __aarch64_vgetq_lane_s32(__a, __b) \
502 __aarch64_vget_lane_any (v4si, , ,__a, __b)
503 #define __aarch64_vgetq_lane_s64(__a, __b) \
504 __aarch64_vget_lane_any (v2di, , ,__a, __b)
506 #define __aarch64_vgetq_lane_u8(__a, __b) \
507 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
508 #define __aarch64_vgetq_lane_u16(__a, __b) \
509 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
510 #define __aarch64_vgetq_lane_u32(__a, __b) \
511 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
512 #define __aarch64_vgetq_lane_u64(__a, __b) \
513 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
515 /* __aarch64_vdup_lane internal macros. */
516 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
517 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
519 #define __aarch64_vdup_lane_f32(__a, __b) \
520 __aarch64_vdup_lane_any (f32, , , __a, __b)
521 #define __aarch64_vdup_lane_f64(__a, __b) (__a)
522 #define __aarch64_vdup_lane_p8(__a, __b) \
523 __aarch64_vdup_lane_any (p8, , , __a, __b)
524 #define __aarch64_vdup_lane_p16(__a, __b) \
525 __aarch64_vdup_lane_any (p16, , , __a, __b)
526 #define __aarch64_vdup_lane_s8(__a, __b) \
527 __aarch64_vdup_lane_any (s8, , , __a, __b)
528 #define __aarch64_vdup_lane_s16(__a, __b) \
529 __aarch64_vdup_lane_any (s16, , , __a, __b)
530 #define __aarch64_vdup_lane_s32(__a, __b) \
531 __aarch64_vdup_lane_any (s32, , , __a, __b)
532 #define __aarch64_vdup_lane_s64(__a, __b) (__a)
533 #define __aarch64_vdup_lane_u8(__a, __b) \
534 __aarch64_vdup_lane_any (u8, , , __a, __b)
535 #define __aarch64_vdup_lane_u16(__a, __b) \
536 __aarch64_vdup_lane_any (u16, , , __a, __b)
537 #define __aarch64_vdup_lane_u32(__a, __b) \
538 __aarch64_vdup_lane_any (u32, , , __a, __b)
539 #define __aarch64_vdup_lane_u64(__a, __b) (__a)
541 /* __aarch64_vdup_laneq internal macros. */
542 #define __aarch64_vdup_laneq_f32(__a, __b) \
543 __aarch64_vdup_lane_any (f32, , q, __a, __b)
544 #define __aarch64_vdup_laneq_f64(__a, __b) \
545 __aarch64_vdup_lane_any (f64, , q, __a, __b)
546 #define __aarch64_vdup_laneq_p8(__a, __b) \
547 __aarch64_vdup_lane_any (p8, , q, __a, __b)
548 #define __aarch64_vdup_laneq_p16(__a, __b) \
549 __aarch64_vdup_lane_any (p16, , q, __a, __b)
550 #define __aarch64_vdup_laneq_s8(__a, __b) \
551 __aarch64_vdup_lane_any (s8, , q, __a, __b)
552 #define __aarch64_vdup_laneq_s16(__a, __b) \
553 __aarch64_vdup_lane_any (s16, , q, __a, __b)
554 #define __aarch64_vdup_laneq_s32(__a, __b) \
555 __aarch64_vdup_lane_any (s32, , q, __a, __b)
556 #define __aarch64_vdup_laneq_s64(__a, __b) \
557 __aarch64_vdup_lane_any (s64, , q, __a, __b)
558 #define __aarch64_vdup_laneq_u8(__a, __b) \
559 __aarch64_vdup_lane_any (u8, , q, __a, __b)
560 #define __aarch64_vdup_laneq_u16(__a, __b) \
561 __aarch64_vdup_lane_any (u16, , q, __a, __b)
562 #define __aarch64_vdup_laneq_u32(__a, __b) \
563 __aarch64_vdup_lane_any (u32, , q, __a, __b)
564 #define __aarch64_vdup_laneq_u64(__a, __b) \
565 __aarch64_vdup_lane_any (u64, , q, __a, __b)
567 /* __aarch64_vdupq_lane internal macros. */
568 #define __aarch64_vdupq_lane_f32(__a, __b) \
569 __aarch64_vdup_lane_any (f32, q, , __a, __b)
570 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
571 #define __aarch64_vdupq_lane_p8(__a, __b) \
572 __aarch64_vdup_lane_any (p8, q, , __a, __b)
573 #define __aarch64_vdupq_lane_p16(__a, __b) \
574 __aarch64_vdup_lane_any (p16, q, , __a, __b)
575 #define __aarch64_vdupq_lane_s8(__a, __b) \
576 __aarch64_vdup_lane_any (s8, q, , __a, __b)
577 #define __aarch64_vdupq_lane_s16(__a, __b) \
578 __aarch64_vdup_lane_any (s16, q, , __a, __b)
579 #define __aarch64_vdupq_lane_s32(__a, __b) \
580 __aarch64_vdup_lane_any (s32, q, , __a, __b)
581 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
582 #define __aarch64_vdupq_lane_u8(__a, __b) \
583 __aarch64_vdup_lane_any (u8, q, , __a, __b)
584 #define __aarch64_vdupq_lane_u16(__a, __b) \
585 __aarch64_vdup_lane_any (u16, q, , __a, __b)
586 #define __aarch64_vdupq_lane_u32(__a, __b) \
587 __aarch64_vdup_lane_any (u32, q, , __a, __b)
588 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
590 /* __aarch64_vdupq_laneq internal macros. */
591 #define __aarch64_vdupq_laneq_f32(__a, __b) \
592 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
593 #define __aarch64_vdupq_laneq_f64(__a, __b) \
594 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
595 #define __aarch64_vdupq_laneq_p8(__a, __b) \
596 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
597 #define __aarch64_vdupq_laneq_p16(__a, __b) \
598 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
599 #define __aarch64_vdupq_laneq_s8(__a, __b) \
600 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
601 #define __aarch64_vdupq_laneq_s16(__a, __b) \
602 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
603 #define __aarch64_vdupq_laneq_s32(__a, __b) \
604 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
605 #define __aarch64_vdupq_laneq_s64(__a, __b) \
606 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
607 #define __aarch64_vdupq_laneq_u8(__a, __b) \
608 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
609 #define __aarch64_vdupq_laneq_u16(__a, __b) \
610 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
611 #define __aarch64_vdupq_laneq_u32(__a, __b) \
612 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
613 #define __aarch64_vdupq_laneq_u64(__a, __b) \
614 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
616 /* vadd */
617 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
618 vadd_s8 (int8x8_t __a, int8x8_t __b)
620 return __a + __b;
623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
624 vadd_s16 (int16x4_t __a, int16x4_t __b)
626 return __a + __b;
629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
630 vadd_s32 (int32x2_t __a, int32x2_t __b)
632 return __a + __b;
635 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
636 vadd_f32 (float32x2_t __a, float32x2_t __b)
638 return __a + __b;
641 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
642 vadd_f64 (float64x1_t __a, float64x1_t __b)
644 return __a + __b;
647 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
648 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
650 return __a + __b;
653 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
654 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
656 return __a + __b;
659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
660 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
662 return __a + __b;
665 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
666 vadd_s64 (int64x1_t __a, int64x1_t __b)
668 return __a + __b;
671 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
672 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
674 return __a + __b;
677 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
678 vaddq_s8 (int8x16_t __a, int8x16_t __b)
680 return __a + __b;
683 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
684 vaddq_s16 (int16x8_t __a, int16x8_t __b)
686 return __a + __b;
689 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
690 vaddq_s32 (int32x4_t __a, int32x4_t __b)
692 return __a + __b;
695 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
696 vaddq_s64 (int64x2_t __a, int64x2_t __b)
698 return __a + __b;
701 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
702 vaddq_f32 (float32x4_t __a, float32x4_t __b)
704 return __a + __b;
707 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
708 vaddq_f64 (float64x2_t __a, float64x2_t __b)
710 return __a + __b;
713 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
714 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
716 return __a + __b;
719 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
720 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
722 return __a + __b;
725 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
726 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
728 return __a + __b;
731 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
732 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
734 return __a + __b;
737 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
738 vaddl_s8 (int8x8_t __a, int8x8_t __b)
740 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
743 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
744 vaddl_s16 (int16x4_t __a, int16x4_t __b)
746 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
749 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
750 vaddl_s32 (int32x2_t __a, int32x2_t __b)
752 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
755 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
756 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
758 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
759 (int8x8_t) __b);
762 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
763 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
765 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
766 (int16x4_t) __b);
769 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
770 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
772 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
773 (int32x2_t) __b);
776 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
777 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
779 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
782 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
783 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
785 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
788 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
789 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
791 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
795 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
797 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
798 (int8x16_t) __b);
801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
802 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
804 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
805 (int16x8_t) __b);
808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
809 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
811 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
812 (int32x4_t) __b);
815 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
816 vaddw_s8 (int16x8_t __a, int8x8_t __b)
818 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
822 vaddw_s16 (int32x4_t __a, int16x4_t __b)
824 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
827 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
828 vaddw_s32 (int64x2_t __a, int32x2_t __b)
830 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
833 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
834 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
836 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
837 (int8x8_t) __b);
840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
841 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
843 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
844 (int16x4_t) __b);
847 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
848 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
850 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
851 (int32x2_t) __b);
854 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
855 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
857 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
860 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
861 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
863 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
866 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
867 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
869 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
872 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
873 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
875 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
876 (int8x16_t) __b);
879 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
880 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
882 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
883 (int16x8_t) __b);
886 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
887 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
889 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
890 (int32x4_t) __b);
893 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
894 vhadd_s8 (int8x8_t __a, int8x8_t __b)
896 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
900 vhadd_s16 (int16x4_t __a, int16x4_t __b)
902 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
905 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
906 vhadd_s32 (int32x2_t __a, int32x2_t __b)
908 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
911 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
912 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
914 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
915 (int8x8_t) __b);
918 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
919 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
921 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
922 (int16x4_t) __b);
925 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
926 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
928 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
929 (int32x2_t) __b);
932 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
933 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
935 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
938 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
939 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
941 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
944 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
945 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
947 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
950 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
951 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
953 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
954 (int8x16_t) __b);
957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
958 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
960 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
961 (int16x8_t) __b);
964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
965 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
967 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
968 (int32x4_t) __b);
971 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
972 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
974 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
978 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
980 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
983 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
984 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
986 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
990 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
992 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
993 (int8x8_t) __b);
996 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
997 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
999 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1000 (int16x4_t) __b);
1003 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1004 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1006 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1007 (int32x2_t) __b);
1010 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1011 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1013 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1016 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1017 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1019 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1022 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1023 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1025 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1028 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1029 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1031 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1032 (int8x16_t) __b);
1035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1036 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1038 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1039 (int16x8_t) __b);
1042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1043 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1045 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1046 (int32x4_t) __b);
1049 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1050 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1052 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1055 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1056 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1058 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1061 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1062 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1064 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1067 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1068 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1070 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1071 (int16x8_t) __b);
1074 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1075 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1077 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1078 (int32x4_t) __b);
1081 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1082 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1084 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1085 (int64x2_t) __b);
1088 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1089 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1091 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1094 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1095 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1097 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1100 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1101 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1103 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1106 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1107 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1109 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1110 (int16x8_t) __b);
1113 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1114 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1116 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1117 (int32x4_t) __b);
1120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1121 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1123 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1124 (int64x2_t) __b);
1127 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1128 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1130 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1133 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1134 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1136 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1139 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1140 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1142 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1145 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1146 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1148 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1149 (int16x8_t) __b,
1150 (int16x8_t) __c);
1153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1154 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1156 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1157 (int32x4_t) __b,
1158 (int32x4_t) __c);
1161 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1162 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1164 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1165 (int64x2_t) __b,
1166 (int64x2_t) __c);
1169 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1170 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1172 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1175 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1176 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1178 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1181 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1182 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1184 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1187 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1188 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1190 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1191 (int16x8_t) __b,
1192 (int16x8_t) __c);
1195 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1196 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1198 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1199 (int32x4_t) __b,
1200 (int32x4_t) __c);
1203 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1204 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1206 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1207 (int64x2_t) __b,
1208 (int64x2_t) __c);
1211 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1212 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1214 return __a / __b;
1217 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1218 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1220 return __a / __b;
1223 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1224 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1226 return __a / __b;
1229 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1230 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1232 return __a / __b;
1235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1236 vmul_s8 (int8x8_t __a, int8x8_t __b)
1238 return __a * __b;
1241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1242 vmul_s16 (int16x4_t __a, int16x4_t __b)
1244 return __a * __b;
1247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1248 vmul_s32 (int32x2_t __a, int32x2_t __b)
1250 return __a * __b;
1253 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1254 vmul_f32 (float32x2_t __a, float32x2_t __b)
1256 return __a * __b;
1259 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1260 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1262 return __a * __b;
1265 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1266 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1268 return __a * __b;
1271 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1272 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1274 return __a * __b;
1277 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1278 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1280 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1281 (int8x8_t) __b);
1284 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1285 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1287 return __a * __b;
1290 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1291 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1293 return __a * __b;
1296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1297 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1299 return __a * __b;
1302 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1303 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1305 return __a * __b;
1308 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1309 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1311 return __a * __b;
1314 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1315 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1317 return __a * __b;
1320 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1321 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1323 return __a * __b;
1326 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1327 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1329 return __a * __b;
1332 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1333 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1335 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1336 (int8x16_t) __b);
1339 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1340 vand_s8 (int8x8_t __a, int8x8_t __b)
1342 return __a & __b;
1345 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1346 vand_s16 (int16x4_t __a, int16x4_t __b)
1348 return __a & __b;
1351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1352 vand_s32 (int32x2_t __a, int32x2_t __b)
1354 return __a & __b;
1357 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1358 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1360 return __a & __b;
1363 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1364 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1366 return __a & __b;
1369 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1370 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1372 return __a & __b;
1375 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1376 vand_s64 (int64x1_t __a, int64x1_t __b)
1378 return __a & __b;
1381 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1382 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1384 return __a & __b;
1387 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1388 vandq_s8 (int8x16_t __a, int8x16_t __b)
1390 return __a & __b;
1393 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1394 vandq_s16 (int16x8_t __a, int16x8_t __b)
1396 return __a & __b;
1399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1400 vandq_s32 (int32x4_t __a, int32x4_t __b)
1402 return __a & __b;
1405 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1406 vandq_s64 (int64x2_t __a, int64x2_t __b)
1408 return __a & __b;
1411 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1412 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1414 return __a & __b;
1417 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1418 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1420 return __a & __b;
1423 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1424 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1426 return __a & __b;
1429 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1430 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1432 return __a & __b;
1435 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1436 vorr_s8 (int8x8_t __a, int8x8_t __b)
1438 return __a | __b;
1441 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1442 vorr_s16 (int16x4_t __a, int16x4_t __b)
1444 return __a | __b;
1447 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1448 vorr_s32 (int32x2_t __a, int32x2_t __b)
1450 return __a | __b;
1453 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1454 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1456 return __a | __b;
1459 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1460 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1462 return __a | __b;
1465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1466 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1468 return __a | __b;
1471 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1472 vorr_s64 (int64x1_t __a, int64x1_t __b)
1474 return __a | __b;
1477 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1478 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1480 return __a | __b;
1483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1484 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1486 return __a | __b;
1489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1490 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1492 return __a | __b;
1495 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1496 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1498 return __a | __b;
1501 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1502 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1504 return __a | __b;
1507 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1508 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1510 return __a | __b;
1513 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1514 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1516 return __a | __b;
1519 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1520 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1522 return __a | __b;
1525 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1526 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1528 return __a | __b;
1531 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1532 veor_s8 (int8x8_t __a, int8x8_t __b)
1534 return __a ^ __b;
1537 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1538 veor_s16 (int16x4_t __a, int16x4_t __b)
1540 return __a ^ __b;
1543 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1544 veor_s32 (int32x2_t __a, int32x2_t __b)
1546 return __a ^ __b;
1549 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1550 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1552 return __a ^ __b;
1555 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1556 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1558 return __a ^ __b;
1561 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1562 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1564 return __a ^ __b;
1567 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1568 veor_s64 (int64x1_t __a, int64x1_t __b)
1570 return __a ^ __b;
1573 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1574 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1576 return __a ^ __b;
1579 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1580 veorq_s8 (int8x16_t __a, int8x16_t __b)
1582 return __a ^ __b;
1585 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1586 veorq_s16 (int16x8_t __a, int16x8_t __b)
1588 return __a ^ __b;
1591 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1592 veorq_s32 (int32x4_t __a, int32x4_t __b)
1594 return __a ^ __b;
1597 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1598 veorq_s64 (int64x2_t __a, int64x2_t __b)
1600 return __a ^ __b;
1603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1604 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1606 return __a ^ __b;
1609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1610 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1612 return __a ^ __b;
1615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1616 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1618 return __a ^ __b;
1621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1622 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1624 return __a ^ __b;
1627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1628 vbic_s8 (int8x8_t __a, int8x8_t __b)
1630 return __a & ~__b;
1633 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1634 vbic_s16 (int16x4_t __a, int16x4_t __b)
1636 return __a & ~__b;
1639 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1640 vbic_s32 (int32x2_t __a, int32x2_t __b)
1642 return __a & ~__b;
1645 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1646 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1648 return __a & ~__b;
1651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1652 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1654 return __a & ~__b;
1657 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1658 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1660 return __a & ~__b;
1663 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1664 vbic_s64 (int64x1_t __a, int64x1_t __b)
1666 return __a & ~__b;
1669 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1670 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1672 return __a & ~__b;
1675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1676 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1678 return __a & ~__b;
1681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1682 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1684 return __a & ~__b;
1687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1688 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1690 return __a & ~__b;
1693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1694 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1696 return __a & ~__b;
1699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1700 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1702 return __a & ~__b;
1705 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1706 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1708 return __a & ~__b;
1711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1712 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1714 return __a & ~__b;
1717 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1718 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1720 return __a & ~__b;
1723 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1724 vorn_s8 (int8x8_t __a, int8x8_t __b)
1726 return __a | ~__b;
1729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1730 vorn_s16 (int16x4_t __a, int16x4_t __b)
1732 return __a | ~__b;
1735 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1736 vorn_s32 (int32x2_t __a, int32x2_t __b)
1738 return __a | ~__b;
1741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1742 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1744 return __a | ~__b;
1747 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1748 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1750 return __a | ~__b;
1753 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1754 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1756 return __a | ~__b;
1759 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1760 vorn_s64 (int64x1_t __a, int64x1_t __b)
1762 return __a | ~__b;
1765 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1766 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1768 return __a | ~__b;
1771 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1772 vornq_s8 (int8x16_t __a, int8x16_t __b)
1774 return __a | ~__b;
1777 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1778 vornq_s16 (int16x8_t __a, int16x8_t __b)
1780 return __a | ~__b;
1783 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1784 vornq_s32 (int32x4_t __a, int32x4_t __b)
1786 return __a | ~__b;
1789 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1790 vornq_s64 (int64x2_t __a, int64x2_t __b)
1792 return __a | ~__b;
1795 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1796 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1798 return __a | ~__b;
1801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1802 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1804 return __a | ~__b;
1807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1808 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1810 return __a | ~__b;
1813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1814 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1816 return __a | ~__b;
1819 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1820 vsub_s8 (int8x8_t __a, int8x8_t __b)
1822 return __a - __b;
1825 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1826 vsub_s16 (int16x4_t __a, int16x4_t __b)
1828 return __a - __b;
1831 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1832 vsub_s32 (int32x2_t __a, int32x2_t __b)
1834 return __a - __b;
1837 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1838 vsub_f32 (float32x2_t __a, float32x2_t __b)
1840 return __a - __b;
1843 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1844 vsub_f64 (float64x1_t __a, float64x1_t __b)
1846 return __a - __b;
1849 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1850 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1852 return __a - __b;
1855 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1856 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1858 return __a - __b;
1861 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1862 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1864 return __a - __b;
1867 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1868 vsub_s64 (int64x1_t __a, int64x1_t __b)
1870 return __a - __b;
1873 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1874 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1876 return __a - __b;
1879 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1880 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1882 return __a - __b;
1885 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1886 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1888 return __a - __b;
1891 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1892 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1894 return __a - __b;
1897 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1898 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1900 return __a - __b;
1903 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1904 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1906 return __a - __b;
1909 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1910 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1912 return __a - __b;
1915 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1916 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1918 return __a - __b;
1921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1922 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1924 return __a - __b;
1927 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1928 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1930 return __a - __b;
1933 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1934 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1936 return __a - __b;
1939 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1940 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1942 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1946 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1948 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1952 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1954 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1958 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1960 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1961 (int8x8_t) __b);
1964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1965 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1967 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1968 (int16x4_t) __b);
1971 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1972 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1974 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1975 (int32x2_t) __b);
1978 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1979 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1981 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1984 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1985 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1987 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1990 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1991 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1993 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1996 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1997 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1999 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2000 (int8x16_t) __b);
2003 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2004 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2006 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2007 (int16x8_t) __b);
2010 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2011 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2013 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2014 (int32x4_t) __b);
2017 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2018 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2020 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2023 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2024 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2026 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2029 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2030 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2032 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2036 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2038 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2039 (int8x8_t) __b);
2042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2043 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2045 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2046 (int16x4_t) __b);
2049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2050 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2052 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2053 (int32x2_t) __b);
2056 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2057 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2059 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2062 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2063 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2065 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2069 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2071 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2074 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2075 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2077 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2078 (int8x16_t) __b);
2081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2082 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2084 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2085 (int16x8_t) __b);
2088 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2089 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2091 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2092 (int32x4_t) __b);
2095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2096 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2098 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2101 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2102 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2104 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2107 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2108 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2110 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2113 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2114 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2116 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
2119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2120 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2122 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
2123 (int8x8_t) __b);
2126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2127 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2129 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2130 (int16x4_t) __b);
2133 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2134 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2136 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2137 (int32x2_t) __b);
2140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2141 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2143 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2144 (int64x1_t) __b);
2147 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2148 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2150 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2153 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2154 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2156 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2160 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2162 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2165 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2166 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2168 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2171 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2172 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2174 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2175 (int8x16_t) __b);
2178 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2179 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2181 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2182 (int16x8_t) __b);
2185 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2186 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2188 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2189 (int32x4_t) __b);
2192 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2193 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2195 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2196 (int64x2_t) __b);
2199 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2200 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2202 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2206 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2208 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2212 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2214 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2217 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2218 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2220 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2223 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2224 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2226 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2227 (int8x8_t) __b);
2230 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2231 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2233 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2234 (int16x4_t) __b);
2237 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2238 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2240 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2241 (int32x2_t) __b);
2244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2245 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2247 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2248 (int64x1_t) __b);
2251 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2252 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2254 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2257 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2258 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2260 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2264 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2266 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2269 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2270 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2272 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2275 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2276 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2278 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2279 (int8x16_t) __b);
2282 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2283 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2285 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2286 (int16x8_t) __b);
2289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2290 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2292 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2293 (int32x4_t) __b);
2296 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2297 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2299 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2300 (int64x2_t) __b);
2303 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2304 vqneg_s8 (int8x8_t __a)
2306 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2309 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2310 vqneg_s16 (int16x4_t __a)
2312 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2315 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2316 vqneg_s32 (int32x2_t __a)
2318 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2321 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2322 vqnegq_s8 (int8x16_t __a)
2324 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2327 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2328 vqnegq_s16 (int16x8_t __a)
2330 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2333 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2334 vqnegq_s32 (int32x4_t __a)
2336 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2339 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2340 vqabs_s8 (int8x8_t __a)
2342 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2345 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2346 vqabs_s16 (int16x4_t __a)
2348 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2352 vqabs_s32 (int32x2_t __a)
2354 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2357 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2358 vqabsq_s8 (int8x16_t __a)
2360 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2363 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2364 vqabsq_s16 (int16x8_t __a)
2366 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2370 vqabsq_s32 (int32x4_t __a)
2372 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2375 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2376 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2378 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2381 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2382 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2384 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2387 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2388 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2390 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2393 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2394 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2396 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2400 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2402 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2405 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2406 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2408 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2411 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2412 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2414 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2417 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2418 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2420 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2423 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2424 vcreate_s8 (uint64_t __a)
2426 return (int8x8_t) __a;
2429 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2430 vcreate_s16 (uint64_t __a)
2432 return (int16x4_t) __a;
2435 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2436 vcreate_s32 (uint64_t __a)
2438 return (int32x2_t) __a;
2441 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2442 vcreate_s64 (uint64_t __a)
2444 return (int64x1_t) __a;
2447 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2448 vcreate_f32 (uint64_t __a)
2450 return (float32x2_t) __a;
2453 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2454 vcreate_u8 (uint64_t __a)
2456 return (uint8x8_t) __a;
2459 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2460 vcreate_u16 (uint64_t __a)
2462 return (uint16x4_t) __a;
2465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2466 vcreate_u32 (uint64_t __a)
2468 return (uint32x2_t) __a;
2471 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2472 vcreate_u64 (uint64_t __a)
2474 return (uint64x1_t) __a;
2477 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2478 vcreate_f64 (uint64_t __a)
2480 return (float64x1_t) __builtin_aarch64_createdf (__a);
2483 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2484 vcreate_p8 (uint64_t __a)
2486 return (poly8x8_t) __a;
2489 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2490 vcreate_p16 (uint64_t __a)
2492 return (poly16x4_t) __a;
2495 /* vget_lane */
2497 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2498 vget_lane_f32 (float32x2_t __a, const int __b)
2500 return __aarch64_vget_lane_f32 (__a, __b);
2503 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2504 vget_lane_f64 (float64x1_t __a, const int __b)
2506 return __aarch64_vget_lane_f64 (__a, __b);
2509 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2510 vget_lane_p8 (poly8x8_t __a, const int __b)
2512 return __aarch64_vget_lane_p8 (__a, __b);
2515 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2516 vget_lane_p16 (poly16x4_t __a, const int __b)
2518 return __aarch64_vget_lane_p16 (__a, __b);
2521 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2522 vget_lane_s8 (int8x8_t __a, const int __b)
2524 return __aarch64_vget_lane_s8 (__a, __b);
2527 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2528 vget_lane_s16 (int16x4_t __a, const int __b)
2530 return __aarch64_vget_lane_s16 (__a, __b);
2533 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2534 vget_lane_s32 (int32x2_t __a, const int __b)
2536 return __aarch64_vget_lane_s32 (__a, __b);
2539 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2540 vget_lane_s64 (int64x1_t __a, const int __b)
2542 return __aarch64_vget_lane_s64 (__a, __b);
2545 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2546 vget_lane_u8 (uint8x8_t __a, const int __b)
2548 return __aarch64_vget_lane_u8 (__a, __b);
2551 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2552 vget_lane_u16 (uint16x4_t __a, const int __b)
2554 return __aarch64_vget_lane_u16 (__a, __b);
2557 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2558 vget_lane_u32 (uint32x2_t __a, const int __b)
2560 return __aarch64_vget_lane_u32 (__a, __b);
2563 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2564 vget_lane_u64 (uint64x1_t __a, const int __b)
2566 return __aarch64_vget_lane_u64 (__a, __b);
2569 /* vgetq_lane */
2571 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2572 vgetq_lane_f32 (float32x4_t __a, const int __b)
2574 return __aarch64_vgetq_lane_f32 (__a, __b);
2577 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2578 vgetq_lane_f64 (float64x2_t __a, const int __b)
2580 return __aarch64_vgetq_lane_f64 (__a, __b);
2583 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2584 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2586 return __aarch64_vgetq_lane_p8 (__a, __b);
2589 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2590 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2592 return __aarch64_vgetq_lane_p16 (__a, __b);
2595 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2596 vgetq_lane_s8 (int8x16_t __a, const int __b)
2598 return __aarch64_vgetq_lane_s8 (__a, __b);
2601 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2602 vgetq_lane_s16 (int16x8_t __a, const int __b)
2604 return __aarch64_vgetq_lane_s16 (__a, __b);
2607 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2608 vgetq_lane_s32 (int32x4_t __a, const int __b)
2610 return __aarch64_vgetq_lane_s32 (__a, __b);
2613 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2614 vgetq_lane_s64 (int64x2_t __a, const int __b)
2616 return __aarch64_vgetq_lane_s64 (__a, __b);
2619 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2620 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2622 return __aarch64_vgetq_lane_u8 (__a, __b);
2625 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2626 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2628 return __aarch64_vgetq_lane_u16 (__a, __b);
2631 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2632 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2634 return __aarch64_vgetq_lane_u32 (__a, __b);
2637 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2638 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2640 return __aarch64_vgetq_lane_u64 (__a, __b);
2643 /* vreinterpret */
2645 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2646 vreinterpret_p8_s8 (int8x8_t __a)
2648 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2651 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2652 vreinterpret_p8_s16 (int16x4_t __a)
2654 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2657 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2658 vreinterpret_p8_s32 (int32x2_t __a)
2660 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2663 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2664 vreinterpret_p8_s64 (int64x1_t __a)
2666 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2669 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2670 vreinterpret_p8_f32 (float32x2_t __a)
2672 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2675 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2676 vreinterpret_p8_u8 (uint8x8_t __a)
2678 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2681 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2682 vreinterpret_p8_u16 (uint16x4_t __a)
2684 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2687 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2688 vreinterpret_p8_u32 (uint32x2_t __a)
2690 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2693 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2694 vreinterpret_p8_u64 (uint64x1_t __a)
2696 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2699 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2700 vreinterpret_p8_p16 (poly16x4_t __a)
2702 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2705 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2706 vreinterpretq_p8_s8 (int8x16_t __a)
2708 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2711 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2712 vreinterpretq_p8_s16 (int16x8_t __a)
2714 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2717 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2718 vreinterpretq_p8_s32 (int32x4_t __a)
2720 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2723 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2724 vreinterpretq_p8_s64 (int64x2_t __a)
2726 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2729 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2730 vreinterpretq_p8_f32 (float32x4_t __a)
2732 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2735 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2736 vreinterpretq_p8_u8 (uint8x16_t __a)
2738 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2739 __a);
2742 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2743 vreinterpretq_p8_u16 (uint16x8_t __a)
2745 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2746 __a);
2749 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2750 vreinterpretq_p8_u32 (uint32x4_t __a)
2752 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2753 __a);
2756 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2757 vreinterpretq_p8_u64 (uint64x2_t __a)
2759 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2760 __a);
2763 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2764 vreinterpretq_p8_p16 (poly16x8_t __a)
2766 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2767 __a);
2770 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2771 vreinterpret_p16_s8 (int8x8_t __a)
2773 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2776 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2777 vreinterpret_p16_s16 (int16x4_t __a)
2779 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2782 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2783 vreinterpret_p16_s32 (int32x2_t __a)
2785 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2788 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2789 vreinterpret_p16_s64 (int64x1_t __a)
2791 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2794 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2795 vreinterpret_p16_f32 (float32x2_t __a)
2797 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2801 vreinterpret_p16_u8 (uint8x8_t __a)
2803 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2807 vreinterpret_p16_u16 (uint16x4_t __a)
2809 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2813 vreinterpret_p16_u32 (uint32x2_t __a)
2815 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2819 vreinterpret_p16_u64 (uint64x1_t __a)
2821 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2824 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2825 vreinterpret_p16_p8 (poly8x8_t __a)
2827 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2830 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2831 vreinterpretq_p16_s8 (int8x16_t __a)
2833 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2836 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2837 vreinterpretq_p16_s16 (int16x8_t __a)
2839 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2842 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2843 vreinterpretq_p16_s32 (int32x4_t __a)
2845 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2848 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2849 vreinterpretq_p16_s64 (int64x2_t __a)
2851 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2854 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2855 vreinterpretq_p16_f32 (float32x4_t __a)
2857 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2860 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2861 vreinterpretq_p16_u8 (uint8x16_t __a)
2863 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2864 __a);
2867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2868 vreinterpretq_p16_u16 (uint16x8_t __a)
2870 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2873 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2874 vreinterpretq_p16_u32 (uint32x4_t __a)
2876 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2880 vreinterpretq_p16_u64 (uint64x2_t __a)
2882 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2885 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2886 vreinterpretq_p16_p8 (poly8x16_t __a)
2888 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2889 __a);
2892 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2893 vreinterpret_f32_s8 (int8x8_t __a)
2895 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2898 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2899 vreinterpret_f32_s16 (int16x4_t __a)
2901 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2904 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2905 vreinterpret_f32_s32 (int32x2_t __a)
2907 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2910 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2911 vreinterpret_f32_s64 (int64x1_t __a)
2913 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2916 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2917 vreinterpret_f32_u8 (uint8x8_t __a)
2919 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2922 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2923 vreinterpret_f32_u16 (uint16x4_t __a)
2925 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2926 __a);
2929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2930 vreinterpret_f32_u32 (uint32x2_t __a)
2932 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2933 __a);
2936 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2937 vreinterpret_f32_u64 (uint64x1_t __a)
2939 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2942 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2943 vreinterpret_f32_p8 (poly8x8_t __a)
2945 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2948 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2949 vreinterpret_f32_p16 (poly16x4_t __a)
2951 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2952 __a);
2955 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2956 vreinterpretq_f32_s8 (int8x16_t __a)
2958 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2961 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2962 vreinterpretq_f32_s16 (int16x8_t __a)
2964 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2968 vreinterpretq_f32_s32 (int32x4_t __a)
2970 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2973 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2974 vreinterpretq_f32_s64 (int64x2_t __a)
2976 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2979 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2980 vreinterpretq_f32_u8 (uint8x16_t __a)
2982 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2983 __a);
2986 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2987 vreinterpretq_f32_u16 (uint16x8_t __a)
2989 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2990 __a);
2993 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2994 vreinterpretq_f32_u32 (uint32x4_t __a)
2996 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2997 __a);
3000 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3001 vreinterpretq_f32_u64 (uint64x2_t __a)
3003 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
3004 __a);
3007 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3008 vreinterpretq_f32_p8 (poly8x16_t __a)
3010 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
3011 __a);
3014 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3015 vreinterpretq_f32_p16 (poly16x8_t __a)
3017 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
3018 __a);
3021 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3022 vreinterpret_s64_s8 (int8x8_t __a)
3024 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3027 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3028 vreinterpret_s64_s16 (int16x4_t __a)
3030 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3033 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3034 vreinterpret_s64_s32 (int32x2_t __a)
3036 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3039 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3040 vreinterpret_s64_f32 (float32x2_t __a)
3042 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3045 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3046 vreinterpret_s64_u8 (uint8x8_t __a)
3048 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3051 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3052 vreinterpret_s64_u16 (uint16x4_t __a)
3054 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3057 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3058 vreinterpret_s64_u32 (uint32x2_t __a)
3060 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3063 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3064 vreinterpret_s64_u64 (uint64x1_t __a)
3066 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
3069 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3070 vreinterpret_s64_p8 (poly8x8_t __a)
3072 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3075 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3076 vreinterpret_s64_p16 (poly16x4_t __a)
3078 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3082 vreinterpretq_s64_s8 (int8x16_t __a)
3084 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3088 vreinterpretq_s64_s16 (int16x8_t __a)
3090 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3093 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3094 vreinterpretq_s64_s32 (int32x4_t __a)
3096 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3100 vreinterpretq_s64_f32 (float32x4_t __a)
3102 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3105 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3106 vreinterpretq_s64_u8 (uint8x16_t __a)
3108 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3112 vreinterpretq_s64_u16 (uint16x8_t __a)
3114 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3118 vreinterpretq_s64_u32 (uint32x4_t __a)
3120 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3123 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3124 vreinterpretq_s64_u64 (uint64x2_t __a)
3126 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
3129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3130 vreinterpretq_s64_p8 (poly8x16_t __a)
3132 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3136 vreinterpretq_s64_p16 (poly16x8_t __a)
3138 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3141 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3142 vreinterpret_u64_s8 (int8x8_t __a)
3144 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3147 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3148 vreinterpret_u64_s16 (int16x4_t __a)
3150 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3154 vreinterpret_u64_s32 (int32x2_t __a)
3156 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3159 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3160 vreinterpret_u64_s64 (int64x1_t __a)
3162 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
3165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3166 vreinterpret_u64_f32 (float32x2_t __a)
3168 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3171 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3172 vreinterpret_u64_u8 (uint8x8_t __a)
3174 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3177 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3178 vreinterpret_u64_u16 (uint16x4_t __a)
3180 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3183 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3184 vreinterpret_u64_u32 (uint32x2_t __a)
3186 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3189 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3190 vreinterpret_u64_p8 (poly8x8_t __a)
3192 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3195 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3196 vreinterpret_u64_p16 (poly16x4_t __a)
3198 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3202 vreinterpretq_u64_s8 (int8x16_t __a)
3204 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3208 vreinterpretq_u64_s16 (int16x8_t __a)
3210 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3213 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3214 vreinterpretq_u64_s32 (int32x4_t __a)
3216 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3219 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3220 vreinterpretq_u64_s64 (int64x2_t __a)
3222 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3225 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3226 vreinterpretq_u64_f32 (float32x4_t __a)
3228 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3231 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3232 vreinterpretq_u64_u8 (uint8x16_t __a)
3234 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3235 __a);
3238 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3239 vreinterpretq_u64_u16 (uint16x8_t __a)
3241 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3244 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3245 vreinterpretq_u64_u32 (uint32x4_t __a)
3247 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3250 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3251 vreinterpretq_u64_p8 (poly8x16_t __a)
3253 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3254 __a);
3257 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3258 vreinterpretq_u64_p16 (poly16x8_t __a)
3260 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3263 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3264 vreinterpret_s8_s16 (int16x4_t __a)
3266 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3269 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3270 vreinterpret_s8_s32 (int32x2_t __a)
3272 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3276 vreinterpret_s8_s64 (int64x1_t __a)
3278 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3282 vreinterpret_s8_f32 (float32x2_t __a)
3284 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3287 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3288 vreinterpret_s8_u8 (uint8x8_t __a)
3290 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3293 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3294 vreinterpret_s8_u16 (uint16x4_t __a)
3296 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3299 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3300 vreinterpret_s8_u32 (uint32x2_t __a)
3302 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3305 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3306 vreinterpret_s8_u64 (uint64x1_t __a)
3308 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3311 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3312 vreinterpret_s8_p8 (poly8x8_t __a)
3314 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3317 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3318 vreinterpret_s8_p16 (poly16x4_t __a)
3320 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3324 vreinterpretq_s8_s16 (int16x8_t __a)
3326 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3330 vreinterpretq_s8_s32 (int32x4_t __a)
3332 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3336 vreinterpretq_s8_s64 (int64x2_t __a)
3338 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3341 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3342 vreinterpretq_s8_f32 (float32x4_t __a)
3344 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3347 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3348 vreinterpretq_s8_u8 (uint8x16_t __a)
3350 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3351 __a);
3354 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3355 vreinterpretq_s8_u16 (uint16x8_t __a)
3357 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3360 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3361 vreinterpretq_s8_u32 (uint32x4_t __a)
3363 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3367 vreinterpretq_s8_u64 (uint64x2_t __a)
3369 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3372 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3373 vreinterpretq_s8_p8 (poly8x16_t __a)
3375 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3376 __a);
3379 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3380 vreinterpretq_s8_p16 (poly16x8_t __a)
3382 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3385 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3386 vreinterpret_s16_s8 (int8x8_t __a)
3388 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3392 vreinterpret_s16_s32 (int32x2_t __a)
3394 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3397 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3398 vreinterpret_s16_s64 (int64x1_t __a)
3400 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3403 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3404 vreinterpret_s16_f32 (float32x2_t __a)
3406 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3409 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3410 vreinterpret_s16_u8 (uint8x8_t __a)
3412 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3415 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3416 vreinterpret_s16_u16 (uint16x4_t __a)
3418 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3422 vreinterpret_s16_u32 (uint32x2_t __a)
3424 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3427 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3428 vreinterpret_s16_u64 (uint64x1_t __a)
3430 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3433 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3434 vreinterpret_s16_p8 (poly8x8_t __a)
3436 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3439 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3440 vreinterpret_s16_p16 (poly16x4_t __a)
3442 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3445 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3446 vreinterpretq_s16_s8 (int8x16_t __a)
3448 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3451 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3452 vreinterpretq_s16_s32 (int32x4_t __a)
3454 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3457 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3458 vreinterpretq_s16_s64 (int64x2_t __a)
3460 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3463 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3464 vreinterpretq_s16_f32 (float32x4_t __a)
3466 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3470 vreinterpretq_s16_u8 (uint8x16_t __a)
3472 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3475 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3476 vreinterpretq_s16_u16 (uint16x8_t __a)
3478 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3481 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3482 vreinterpretq_s16_u32 (uint32x4_t __a)
3484 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3487 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3488 vreinterpretq_s16_u64 (uint64x2_t __a)
3490 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3493 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3494 vreinterpretq_s16_p8 (poly8x16_t __a)
3496 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3499 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3500 vreinterpretq_s16_p16 (poly16x8_t __a)
3502 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3506 vreinterpret_s32_s8 (int8x8_t __a)
3508 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3511 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3512 vreinterpret_s32_s16 (int16x4_t __a)
3514 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3517 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3518 vreinterpret_s32_s64 (int64x1_t __a)
3520 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3523 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3524 vreinterpret_s32_f32 (float32x2_t __a)
3526 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3529 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3530 vreinterpret_s32_u8 (uint8x8_t __a)
3532 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3535 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3536 vreinterpret_s32_u16 (uint16x4_t __a)
3538 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3541 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3542 vreinterpret_s32_u32 (uint32x2_t __a)
3544 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3547 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3548 vreinterpret_s32_u64 (uint64x1_t __a)
3550 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3554 vreinterpret_s32_p8 (poly8x8_t __a)
3556 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3559 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3560 vreinterpret_s32_p16 (poly16x4_t __a)
3562 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3565 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3566 vreinterpretq_s32_s8 (int8x16_t __a)
3568 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3572 vreinterpretq_s32_s16 (int16x8_t __a)
3574 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3578 vreinterpretq_s32_s64 (int64x2_t __a)
3580 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3584 vreinterpretq_s32_f32 (float32x4_t __a)
3586 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3589 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3590 vreinterpretq_s32_u8 (uint8x16_t __a)
3592 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3595 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3596 vreinterpretq_s32_u16 (uint16x8_t __a)
3598 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3601 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3602 vreinterpretq_s32_u32 (uint32x4_t __a)
3604 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3607 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3608 vreinterpretq_s32_u64 (uint64x2_t __a)
3610 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3613 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3614 vreinterpretq_s32_p8 (poly8x16_t __a)
3616 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3619 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3620 vreinterpretq_s32_p16 (poly16x8_t __a)
3622 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3626 vreinterpret_u8_s8 (int8x8_t __a)
3628 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3631 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3632 vreinterpret_u8_s16 (int16x4_t __a)
3634 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3637 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3638 vreinterpret_u8_s32 (int32x2_t __a)
3640 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3644 vreinterpret_u8_s64 (int64x1_t __a)
3646 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3649 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3650 vreinterpret_u8_f32 (float32x2_t __a)
3652 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3655 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3656 vreinterpret_u8_u16 (uint16x4_t __a)
3658 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3661 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3662 vreinterpret_u8_u32 (uint32x2_t __a)
3664 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3667 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3668 vreinterpret_u8_u64 (uint64x1_t __a)
3670 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3673 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3674 vreinterpret_u8_p8 (poly8x8_t __a)
3676 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3679 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3680 vreinterpret_u8_p16 (poly16x4_t __a)
3682 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3686 vreinterpretq_u8_s8 (int8x16_t __a)
3688 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3691 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3692 vreinterpretq_u8_s16 (int16x8_t __a)
3694 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3697 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3698 vreinterpretq_u8_s32 (int32x4_t __a)
3700 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3704 vreinterpretq_u8_s64 (int64x2_t __a)
3706 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3709 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3710 vreinterpretq_u8_f32 (float32x4_t __a)
3712 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3715 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3716 vreinterpretq_u8_u16 (uint16x8_t __a)
3718 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3719 __a);
3722 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3723 vreinterpretq_u8_u32 (uint32x4_t __a)
3725 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3726 __a);
3729 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3730 vreinterpretq_u8_u64 (uint64x2_t __a)
3732 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3733 __a);
3736 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3737 vreinterpretq_u8_p8 (poly8x16_t __a)
3739 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3740 __a);
3743 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3744 vreinterpretq_u8_p16 (poly16x8_t __a)
3746 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3747 __a);
3750 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3751 vreinterpret_u16_s8 (int8x8_t __a)
3753 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3757 vreinterpret_u16_s16 (int16x4_t __a)
3759 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3763 vreinterpret_u16_s32 (int32x2_t __a)
3765 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3769 vreinterpret_u16_s64 (int64x1_t __a)
3771 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3775 vreinterpret_u16_f32 (float32x2_t __a)
3777 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3780 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3781 vreinterpret_u16_u8 (uint8x8_t __a)
3783 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3786 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3787 vreinterpret_u16_u32 (uint32x2_t __a)
3789 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3792 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3793 vreinterpret_u16_u64 (uint64x1_t __a)
3795 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3798 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3799 vreinterpret_u16_p8 (poly8x8_t __a)
3801 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3804 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3805 vreinterpret_u16_p16 (poly16x4_t __a)
3807 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3810 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3811 vreinterpretq_u16_s8 (int8x16_t __a)
3813 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3817 vreinterpretq_u16_s16 (int16x8_t __a)
3819 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3823 vreinterpretq_u16_s32 (int32x4_t __a)
3825 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3829 vreinterpretq_u16_s64 (int64x2_t __a)
3831 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3835 vreinterpretq_u16_f32 (float32x4_t __a)
3837 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3840 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3841 vreinterpretq_u16_u8 (uint8x16_t __a)
3843 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3844 __a);
3847 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3848 vreinterpretq_u16_u32 (uint32x4_t __a)
3850 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3854 vreinterpretq_u16_u64 (uint64x2_t __a)
3856 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3859 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3860 vreinterpretq_u16_p8 (poly8x16_t __a)
3862 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3863 __a);
3866 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3867 vreinterpretq_u16_p16 (poly16x8_t __a)
3869 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3872 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3873 vreinterpret_u32_s8 (int8x8_t __a)
3875 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3878 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3879 vreinterpret_u32_s16 (int16x4_t __a)
3881 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3884 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3885 vreinterpret_u32_s32 (int32x2_t __a)
3887 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3890 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3891 vreinterpret_u32_s64 (int64x1_t __a)
3893 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3897 vreinterpret_u32_f32 (float32x2_t __a)
3899 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3903 vreinterpret_u32_u8 (uint8x8_t __a)
3905 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3909 vreinterpret_u32_u16 (uint16x4_t __a)
3911 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3914 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3915 vreinterpret_u32_u64 (uint64x1_t __a)
3917 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3921 vreinterpret_u32_p8 (poly8x8_t __a)
3923 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3926 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3927 vreinterpret_u32_p16 (poly16x4_t __a)
3929 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3932 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3933 vreinterpretq_u32_s8 (int8x16_t __a)
3935 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3938 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3939 vreinterpretq_u32_s16 (int16x8_t __a)
3941 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3944 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3945 vreinterpretq_u32_s32 (int32x4_t __a)
3947 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3950 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3951 vreinterpretq_u32_s64 (int64x2_t __a)
3953 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3956 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3957 vreinterpretq_u32_f32 (float32x4_t __a)
3959 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3962 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3963 vreinterpretq_u32_u8 (uint8x16_t __a)
3965 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3966 __a);
3969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3970 vreinterpretq_u32_u16 (uint16x8_t __a)
3972 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3975 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3976 vreinterpretq_u32_u64 (uint64x2_t __a)
3978 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3981 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3982 vreinterpretq_u32_p8 (poly8x16_t __a)
3984 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3985 __a);
3988 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3989 vreinterpretq_u32_p16 (poly16x8_t __a)
3991 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3994 #define __GET_LOW(__TYPE) \
3995 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
3996 uint64_t lo = vgetq_lane_u64 (tmp, 0); \
3997 return vreinterpret_##__TYPE##_u64 (lo);
3999 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4000 vget_low_f32 (float32x4_t __a)
4002 __GET_LOW (f32);
4005 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4006 vget_low_f64 (float64x2_t __a)
4008 return vgetq_lane_f64 (__a, 0);
4011 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4012 vget_low_p8 (poly8x16_t __a)
4014 __GET_LOW (p8);
4017 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4018 vget_low_p16 (poly16x8_t __a)
4020 __GET_LOW (p16);
4023 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4024 vget_low_s8 (int8x16_t __a)
4026 __GET_LOW (s8);
4029 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4030 vget_low_s16 (int16x8_t __a)
4032 __GET_LOW (s16);
4035 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4036 vget_low_s32 (int32x4_t __a)
4038 __GET_LOW (s32);
4041 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4042 vget_low_s64 (int64x2_t __a)
4044 return vgetq_lane_s64 (__a, 0);
4047 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4048 vget_low_u8 (uint8x16_t __a)
4050 __GET_LOW (u8);
4053 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4054 vget_low_u16 (uint16x8_t __a)
4056 __GET_LOW (u16);
4059 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4060 vget_low_u32 (uint32x4_t __a)
4062 __GET_LOW (u32);
4065 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4066 vget_low_u64 (uint64x2_t __a)
4068 return vgetq_lane_u64 (__a, 0);
4071 #undef __GET_LOW
4073 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4074 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4076 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4079 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4080 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4082 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4085 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4086 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4088 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4091 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4092 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4094 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
4097 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4098 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4100 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4103 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4104 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4106 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4107 (int8x8_t) __b);
4110 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4111 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4113 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4114 (int16x4_t) __b);
4117 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4118 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4120 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4121 (int32x2_t) __b);
4124 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4125 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4127 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
4128 (int64x1_t) __b);
4131 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4132 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4134 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
4137 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4138 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4140 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4141 (int8x8_t) __b);
4144 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4145 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4147 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4148 (int16x4_t) __b);
4151 /* Start of temporary inline asm implementations. */
4153 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4154 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4156 int8x8_t result;
4157 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4158 : "=w"(result)
4159 : "0"(a), "w"(b), "w"(c)
4160 : /* No clobbers */);
4161 return result;
4164 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4165 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4167 int16x4_t result;
4168 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4169 : "=w"(result)
4170 : "0"(a), "w"(b), "w"(c)
4171 : /* No clobbers */);
4172 return result;
4175 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4176 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4178 int32x2_t result;
4179 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4180 : "=w"(result)
4181 : "0"(a), "w"(b), "w"(c)
4182 : /* No clobbers */);
4183 return result;
4186 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4187 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4189 uint8x8_t result;
4190 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4191 : "=w"(result)
4192 : "0"(a), "w"(b), "w"(c)
4193 : /* No clobbers */);
4194 return result;
4197 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4198 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4200 uint16x4_t result;
4201 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4202 : "=w"(result)
4203 : "0"(a), "w"(b), "w"(c)
4204 : /* No clobbers */);
4205 return result;
4208 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4209 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4211 uint32x2_t result;
4212 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4213 : "=w"(result)
4214 : "0"(a), "w"(b), "w"(c)
4215 : /* No clobbers */);
4216 return result;
4219 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4220 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4222 int16x8_t result;
4223 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4224 : "=w"(result)
4225 : "0"(a), "w"(b), "w"(c)
4226 : /* No clobbers */);
4227 return result;
4230 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4231 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4233 int32x4_t result;
4234 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4235 : "=w"(result)
4236 : "0"(a), "w"(b), "w"(c)
4237 : /* No clobbers */);
4238 return result;
4241 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4242 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4244 int64x2_t result;
4245 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4246 : "=w"(result)
4247 : "0"(a), "w"(b), "w"(c)
4248 : /* No clobbers */);
4249 return result;
4252 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4253 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4255 uint16x8_t result;
4256 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4257 : "=w"(result)
4258 : "0"(a), "w"(b), "w"(c)
4259 : /* No clobbers */);
4260 return result;
4263 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4264 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4266 uint32x4_t result;
4267 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4268 : "=w"(result)
4269 : "0"(a), "w"(b), "w"(c)
4270 : /* No clobbers */);
4271 return result;
4274 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4275 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4277 uint64x2_t result;
4278 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4279 : "=w"(result)
4280 : "0"(a), "w"(b), "w"(c)
4281 : /* No clobbers */);
4282 return result;
4285 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4286 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4288 int16x8_t result;
4289 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4290 : "=w"(result)
4291 : "0"(a), "w"(b), "w"(c)
4292 : /* No clobbers */);
4293 return result;
4296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4297 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4299 int32x4_t result;
4300 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4301 : "=w"(result)
4302 : "0"(a), "w"(b), "w"(c)
4303 : /* No clobbers */);
4304 return result;
4307 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4308 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4310 int64x2_t result;
4311 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4312 : "=w"(result)
4313 : "0"(a), "w"(b), "w"(c)
4314 : /* No clobbers */);
4315 return result;
4318 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4319 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4321 uint16x8_t result;
4322 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4323 : "=w"(result)
4324 : "0"(a), "w"(b), "w"(c)
4325 : /* No clobbers */);
4326 return result;
4329 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4330 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4332 uint32x4_t result;
4333 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4334 : "=w"(result)
4335 : "0"(a), "w"(b), "w"(c)
4336 : /* No clobbers */);
4337 return result;
4340 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4341 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4343 uint64x2_t result;
4344 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4345 : "=w"(result)
4346 : "0"(a), "w"(b), "w"(c)
4347 : /* No clobbers */);
4348 return result;
4351 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4352 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4354 int8x16_t result;
4355 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4356 : "=w"(result)
4357 : "0"(a), "w"(b), "w"(c)
4358 : /* No clobbers */);
4359 return result;
4362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4363 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4365 int16x8_t result;
4366 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4367 : "=w"(result)
4368 : "0"(a), "w"(b), "w"(c)
4369 : /* No clobbers */);
4370 return result;
4373 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4374 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4376 int32x4_t result;
4377 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4378 : "=w"(result)
4379 : "0"(a), "w"(b), "w"(c)
4380 : /* No clobbers */);
4381 return result;
4384 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4385 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4387 uint8x16_t result;
4388 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4389 : "=w"(result)
4390 : "0"(a), "w"(b), "w"(c)
4391 : /* No clobbers */);
4392 return result;
4395 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4396 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4398 uint16x8_t result;
4399 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4400 : "=w"(result)
4401 : "0"(a), "w"(b), "w"(c)
4402 : /* No clobbers */);
4403 return result;
4406 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4407 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4409 uint32x4_t result;
4410 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4411 : "=w"(result)
4412 : "0"(a), "w"(b), "w"(c)
4413 : /* No clobbers */);
4414 return result;
4417 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4418 vabd_f32 (float32x2_t a, float32x2_t b)
4420 float32x2_t result;
4421 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4422 : "=w"(result)
4423 : "w"(a), "w"(b)
4424 : /* No clobbers */);
4425 return result;
4428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4429 vabd_s8 (int8x8_t a, int8x8_t b)
4431 int8x8_t result;
4432 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4433 : "=w"(result)
4434 : "w"(a), "w"(b)
4435 : /* No clobbers */);
4436 return result;
4439 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4440 vabd_s16 (int16x4_t a, int16x4_t b)
4442 int16x4_t result;
4443 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4444 : "=w"(result)
4445 : "w"(a), "w"(b)
4446 : /* No clobbers */);
4447 return result;
4450 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4451 vabd_s32 (int32x2_t a, int32x2_t b)
4453 int32x2_t result;
4454 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4455 : "=w"(result)
4456 : "w"(a), "w"(b)
4457 : /* No clobbers */);
4458 return result;
4461 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4462 vabd_u8 (uint8x8_t a, uint8x8_t b)
4464 uint8x8_t result;
4465 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4466 : "=w"(result)
4467 : "w"(a), "w"(b)
4468 : /* No clobbers */);
4469 return result;
4472 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4473 vabd_u16 (uint16x4_t a, uint16x4_t b)
4475 uint16x4_t result;
4476 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4477 : "=w"(result)
4478 : "w"(a), "w"(b)
4479 : /* No clobbers */);
4480 return result;
4483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4484 vabd_u32 (uint32x2_t a, uint32x2_t b)
4486 uint32x2_t result;
4487 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4488 : "=w"(result)
4489 : "w"(a), "w"(b)
4490 : /* No clobbers */);
4491 return result;
4494 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4495 vabdd_f64 (float64_t a, float64_t b)
4497 float64_t result;
4498 __asm__ ("fabd %d0, %d1, %d2"
4499 : "=w"(result)
4500 : "w"(a), "w"(b)
4501 : /* No clobbers */);
4502 return result;
4505 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4506 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4508 int16x8_t result;
4509 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4510 : "=w"(result)
4511 : "w"(a), "w"(b)
4512 : /* No clobbers */);
4513 return result;
4516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4517 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4519 int32x4_t result;
4520 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4521 : "=w"(result)
4522 : "w"(a), "w"(b)
4523 : /* No clobbers */);
4524 return result;
4527 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4528 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4530 int64x2_t result;
4531 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4532 : "=w"(result)
4533 : "w"(a), "w"(b)
4534 : /* No clobbers */);
4535 return result;
4538 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4539 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4541 uint16x8_t result;
4542 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4543 : "=w"(result)
4544 : "w"(a), "w"(b)
4545 : /* No clobbers */);
4546 return result;
4549 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4550 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4552 uint32x4_t result;
4553 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4554 : "=w"(result)
4555 : "w"(a), "w"(b)
4556 : /* No clobbers */);
4557 return result;
4560 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4561 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4563 uint64x2_t result;
4564 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4565 : "=w"(result)
4566 : "w"(a), "w"(b)
4567 : /* No clobbers */);
4568 return result;
4571 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4572 vabdl_s8 (int8x8_t a, int8x8_t b)
4574 int16x8_t result;
4575 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4576 : "=w"(result)
4577 : "w"(a), "w"(b)
4578 : /* No clobbers */);
4579 return result;
4582 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4583 vabdl_s16 (int16x4_t a, int16x4_t b)
4585 int32x4_t result;
4586 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4587 : "=w"(result)
4588 : "w"(a), "w"(b)
4589 : /* No clobbers */);
4590 return result;
4593 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4594 vabdl_s32 (int32x2_t a, int32x2_t b)
4596 int64x2_t result;
4597 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4598 : "=w"(result)
4599 : "w"(a), "w"(b)
4600 : /* No clobbers */);
4601 return result;
4604 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4605 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4607 uint16x8_t result;
4608 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4609 : "=w"(result)
4610 : "w"(a), "w"(b)
4611 : /* No clobbers */);
4612 return result;
4615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4616 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4618 uint32x4_t result;
4619 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4620 : "=w"(result)
4621 : "w"(a), "w"(b)
4622 : /* No clobbers */);
4623 return result;
4626 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4627 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4629 uint64x2_t result;
4630 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4631 : "=w"(result)
4632 : "w"(a), "w"(b)
4633 : /* No clobbers */);
4634 return result;
4637 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4638 vabdq_f32 (float32x4_t a, float32x4_t b)
4640 float32x4_t result;
4641 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4642 : "=w"(result)
4643 : "w"(a), "w"(b)
4644 : /* No clobbers */);
4645 return result;
4648 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4649 vabdq_f64 (float64x2_t a, float64x2_t b)
4651 float64x2_t result;
4652 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4653 : "=w"(result)
4654 : "w"(a), "w"(b)
4655 : /* No clobbers */);
4656 return result;
4659 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4660 vabdq_s8 (int8x16_t a, int8x16_t b)
4662 int8x16_t result;
4663 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4664 : "=w"(result)
4665 : "w"(a), "w"(b)
4666 : /* No clobbers */);
4667 return result;
4670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4671 vabdq_s16 (int16x8_t a, int16x8_t b)
4673 int16x8_t result;
4674 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4675 : "=w"(result)
4676 : "w"(a), "w"(b)
4677 : /* No clobbers */);
4678 return result;
4681 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4682 vabdq_s32 (int32x4_t a, int32x4_t b)
4684 int32x4_t result;
4685 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4686 : "=w"(result)
4687 : "w"(a), "w"(b)
4688 : /* No clobbers */);
4689 return result;
4692 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4693 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4695 uint8x16_t result;
4696 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4697 : "=w"(result)
4698 : "w"(a), "w"(b)
4699 : /* No clobbers */);
4700 return result;
4703 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4704 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4706 uint16x8_t result;
4707 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4708 : "=w"(result)
4709 : "w"(a), "w"(b)
4710 : /* No clobbers */);
4711 return result;
4714 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4715 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4717 uint32x4_t result;
4718 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4719 : "=w"(result)
4720 : "w"(a), "w"(b)
4721 : /* No clobbers */);
4722 return result;
4725 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4726 vabds_f32 (float32_t a, float32_t b)
4728 float32_t result;
4729 __asm__ ("fabd %s0, %s1, %s2"
4730 : "=w"(result)
4731 : "w"(a), "w"(b)
4732 : /* No clobbers */);
4733 return result;
4736 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4737 vaddlv_s8 (int8x8_t a)
4739 int16_t result;
4740 __asm__ ("saddlv %h0,%1.8b"
4741 : "=w"(result)
4742 : "w"(a)
4743 : /* No clobbers */);
4744 return result;
4747 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4748 vaddlv_s16 (int16x4_t a)
4750 int32_t result;
4751 __asm__ ("saddlv %s0,%1.4h"
4752 : "=w"(result)
4753 : "w"(a)
4754 : /* No clobbers */);
4755 return result;
4758 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4759 vaddlv_u8 (uint8x8_t a)
4761 uint16_t result;
4762 __asm__ ("uaddlv %h0,%1.8b"
4763 : "=w"(result)
4764 : "w"(a)
4765 : /* No clobbers */);
4766 return result;
4769 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4770 vaddlv_u16 (uint16x4_t a)
4772 uint32_t result;
4773 __asm__ ("uaddlv %s0,%1.4h"
4774 : "=w"(result)
4775 : "w"(a)
4776 : /* No clobbers */);
4777 return result;
4780 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4781 vaddlvq_s8 (int8x16_t a)
4783 int16_t result;
4784 __asm__ ("saddlv %h0,%1.16b"
4785 : "=w"(result)
4786 : "w"(a)
4787 : /* No clobbers */);
4788 return result;
4791 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4792 vaddlvq_s16 (int16x8_t a)
4794 int32_t result;
4795 __asm__ ("saddlv %s0,%1.8h"
4796 : "=w"(result)
4797 : "w"(a)
4798 : /* No clobbers */);
4799 return result;
4802 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4803 vaddlvq_s32 (int32x4_t a)
4805 int64_t result;
4806 __asm__ ("saddlv %d0,%1.4s"
4807 : "=w"(result)
4808 : "w"(a)
4809 : /* No clobbers */);
4810 return result;
4813 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4814 vaddlvq_u8 (uint8x16_t a)
4816 uint16_t result;
4817 __asm__ ("uaddlv %h0,%1.16b"
4818 : "=w"(result)
4819 : "w"(a)
4820 : /* No clobbers */);
4821 return result;
4824 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4825 vaddlvq_u16 (uint16x8_t a)
4827 uint32_t result;
4828 __asm__ ("uaddlv %s0,%1.8h"
4829 : "=w"(result)
4830 : "w"(a)
4831 : /* No clobbers */);
4832 return result;
4835 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4836 vaddlvq_u32 (uint32x4_t a)
4838 uint64_t result;
4839 __asm__ ("uaddlv %d0,%1.4s"
4840 : "=w"(result)
4841 : "w"(a)
4842 : /* No clobbers */);
4843 return result;
4846 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4847 vcls_s8 (int8x8_t a)
4849 int8x8_t result;
4850 __asm__ ("cls %0.8b,%1.8b"
4851 : "=w"(result)
4852 : "w"(a)
4853 : /* No clobbers */);
4854 return result;
4857 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4858 vcls_s16 (int16x4_t a)
4860 int16x4_t result;
4861 __asm__ ("cls %0.4h,%1.4h"
4862 : "=w"(result)
4863 : "w"(a)
4864 : /* No clobbers */);
4865 return result;
4868 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4869 vcls_s32 (int32x2_t a)
4871 int32x2_t result;
4872 __asm__ ("cls %0.2s,%1.2s"
4873 : "=w"(result)
4874 : "w"(a)
4875 : /* No clobbers */);
4876 return result;
4879 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4880 vclsq_s8 (int8x16_t a)
4882 int8x16_t result;
4883 __asm__ ("cls %0.16b,%1.16b"
4884 : "=w"(result)
4885 : "w"(a)
4886 : /* No clobbers */);
4887 return result;
4890 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4891 vclsq_s16 (int16x8_t a)
4893 int16x8_t result;
4894 __asm__ ("cls %0.8h,%1.8h"
4895 : "=w"(result)
4896 : "w"(a)
4897 : /* No clobbers */);
4898 return result;
4901 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4902 vclsq_s32 (int32x4_t a)
4904 int32x4_t result;
4905 __asm__ ("cls %0.4s,%1.4s"
4906 : "=w"(result)
4907 : "w"(a)
4908 : /* No clobbers */);
4909 return result;
4912 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4913 vcnt_p8 (poly8x8_t a)
4915 poly8x8_t result;
4916 __asm__ ("cnt %0.8b,%1.8b"
4917 : "=w"(result)
4918 : "w"(a)
4919 : /* No clobbers */);
4920 return result;
4923 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4924 vcnt_s8 (int8x8_t a)
4926 int8x8_t result;
4927 __asm__ ("cnt %0.8b,%1.8b"
4928 : "=w"(result)
4929 : "w"(a)
4930 : /* No clobbers */);
4931 return result;
4934 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4935 vcnt_u8 (uint8x8_t a)
4937 uint8x8_t result;
4938 __asm__ ("cnt %0.8b,%1.8b"
4939 : "=w"(result)
4940 : "w"(a)
4941 : /* No clobbers */);
4942 return result;
4945 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4946 vcntq_p8 (poly8x16_t a)
4948 poly8x16_t result;
4949 __asm__ ("cnt %0.16b,%1.16b"
4950 : "=w"(result)
4951 : "w"(a)
4952 : /* No clobbers */);
4953 return result;
4956 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4957 vcntq_s8 (int8x16_t a)
4959 int8x16_t result;
4960 __asm__ ("cnt %0.16b,%1.16b"
4961 : "=w"(result)
4962 : "w"(a)
4963 : /* No clobbers */);
4964 return result;
4967 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4968 vcntq_u8 (uint8x16_t a)
4970 uint8x16_t result;
4971 __asm__ ("cnt %0.16b,%1.16b"
4972 : "=w"(result)
4973 : "w"(a)
4974 : /* No clobbers */);
4975 return result;
4978 #define vcopyq_lane_f32(a, b, c, d) \
4979 __extension__ \
4980 ({ \
4981 float32x4_t c_ = (c); \
4982 float32x4_t a_ = (a); \
4983 float32x4_t result; \
4984 __asm__ ("ins %0.s[%2], %3.s[%4]" \
4985 : "=w"(result) \
4986 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
4987 : /* No clobbers */); \
4988 result; \
4991 #define vcopyq_lane_f64(a, b, c, d) \
4992 __extension__ \
4993 ({ \
4994 float64x2_t c_ = (c); \
4995 float64x2_t a_ = (a); \
4996 float64x2_t result; \
4997 __asm__ ("ins %0.d[%2], %3.d[%4]" \
4998 : "=w"(result) \
4999 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5000 : /* No clobbers */); \
5001 result; \
5004 #define vcopyq_lane_p8(a, b, c, d) \
5005 __extension__ \
5006 ({ \
5007 poly8x16_t c_ = (c); \
5008 poly8x16_t a_ = (a); \
5009 poly8x16_t result; \
5010 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5011 : "=w"(result) \
5012 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5013 : /* No clobbers */); \
5014 result; \
5017 #define vcopyq_lane_p16(a, b, c, d) \
5018 __extension__ \
5019 ({ \
5020 poly16x8_t c_ = (c); \
5021 poly16x8_t a_ = (a); \
5022 poly16x8_t result; \
5023 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5024 : "=w"(result) \
5025 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5026 : /* No clobbers */); \
5027 result; \
5030 #define vcopyq_lane_s8(a, b, c, d) \
5031 __extension__ \
5032 ({ \
5033 int8x16_t c_ = (c); \
5034 int8x16_t a_ = (a); \
5035 int8x16_t result; \
5036 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5037 : "=w"(result) \
5038 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5039 : /* No clobbers */); \
5040 result; \
5043 #define vcopyq_lane_s16(a, b, c, d) \
5044 __extension__ \
5045 ({ \
5046 int16x8_t c_ = (c); \
5047 int16x8_t a_ = (a); \
5048 int16x8_t result; \
5049 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5050 : "=w"(result) \
5051 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5052 : /* No clobbers */); \
5053 result; \
5056 #define vcopyq_lane_s32(a, b, c, d) \
5057 __extension__ \
5058 ({ \
5059 int32x4_t c_ = (c); \
5060 int32x4_t a_ = (a); \
5061 int32x4_t result; \
5062 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5063 : "=w"(result) \
5064 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5065 : /* No clobbers */); \
5066 result; \
5069 #define vcopyq_lane_s64(a, b, c, d) \
5070 __extension__ \
5071 ({ \
5072 int64x2_t c_ = (c); \
5073 int64x2_t a_ = (a); \
5074 int64x2_t result; \
5075 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5076 : "=w"(result) \
5077 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5078 : /* No clobbers */); \
5079 result; \
5082 #define vcopyq_lane_u8(a, b, c, d) \
5083 __extension__ \
5084 ({ \
5085 uint8x16_t c_ = (c); \
5086 uint8x16_t a_ = (a); \
5087 uint8x16_t result; \
5088 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5089 : "=w"(result) \
5090 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5091 : /* No clobbers */); \
5092 result; \
5095 #define vcopyq_lane_u16(a, b, c, d) \
5096 __extension__ \
5097 ({ \
5098 uint16x8_t c_ = (c); \
5099 uint16x8_t a_ = (a); \
5100 uint16x8_t result; \
5101 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5102 : "=w"(result) \
5103 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5104 : /* No clobbers */); \
5105 result; \
5108 #define vcopyq_lane_u32(a, b, c, d) \
5109 __extension__ \
5110 ({ \
5111 uint32x4_t c_ = (c); \
5112 uint32x4_t a_ = (a); \
5113 uint32x4_t result; \
5114 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5115 : "=w"(result) \
5116 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5117 : /* No clobbers */); \
5118 result; \
5121 #define vcopyq_lane_u64(a, b, c, d) \
5122 __extension__ \
5123 ({ \
5124 uint64x2_t c_ = (c); \
5125 uint64x2_t a_ = (a); \
5126 uint64x2_t result; \
5127 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5128 : "=w"(result) \
5129 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5130 : /* No clobbers */); \
5131 result; \
5134 /* vcvt_f16_f32 not supported */
5136 /* vcvt_f32_f16 not supported */
5138 /* vcvt_high_f16_f32 not supported */
5140 /* vcvt_high_f32_f16 not supported */
5142 static float32x2_t vdup_n_f32 (float32_t);
5144 #define vcvt_n_f32_s32(a, b) \
5145 __extension__ \
5146 ({ \
5147 int32x2_t a_ = (a); \
5148 float32x2_t result; \
5149 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5150 : "=w"(result) \
5151 : "w"(a_), "i"(b) \
5152 : /* No clobbers */); \
5153 result; \
5156 #define vcvt_n_f32_u32(a, b) \
5157 __extension__ \
5158 ({ \
5159 uint32x2_t a_ = (a); \
5160 float32x2_t result; \
5161 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5162 : "=w"(result) \
5163 : "w"(a_), "i"(b) \
5164 : /* No clobbers */); \
5165 result; \
5168 #define vcvt_n_s32_f32(a, b) \
5169 __extension__ \
5170 ({ \
5171 float32x2_t a_ = (a); \
5172 int32x2_t result; \
5173 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5174 : "=w"(result) \
5175 : "w"(a_), "i"(b) \
5176 : /* No clobbers */); \
5177 result; \
5180 #define vcvt_n_u32_f32(a, b) \
5181 __extension__ \
5182 ({ \
5183 float32x2_t a_ = (a); \
5184 uint32x2_t result; \
5185 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5186 : "=w"(result) \
5187 : "w"(a_), "i"(b) \
5188 : /* No clobbers */); \
5189 result; \
5192 #define vcvtd_n_f64_s64(a, b) \
5193 __extension__ \
5194 ({ \
5195 int64_t a_ = (a); \
5196 float64_t result; \
5197 __asm__ ("scvtf %d0,%d1,%2" \
5198 : "=w"(result) \
5199 : "w"(a_), "i"(b) \
5200 : /* No clobbers */); \
5201 result; \
5204 #define vcvtd_n_f64_u64(a, b) \
5205 __extension__ \
5206 ({ \
5207 uint64_t a_ = (a); \
5208 float64_t result; \
5209 __asm__ ("ucvtf %d0,%d1,%2" \
5210 : "=w"(result) \
5211 : "w"(a_), "i"(b) \
5212 : /* No clobbers */); \
5213 result; \
5216 #define vcvtd_n_s64_f64(a, b) \
5217 __extension__ \
5218 ({ \
5219 float64_t a_ = (a); \
5220 int64_t result; \
5221 __asm__ ("fcvtzs %d0,%d1,%2" \
5222 : "=w"(result) \
5223 : "w"(a_), "i"(b) \
5224 : /* No clobbers */); \
5225 result; \
5228 #define vcvtd_n_u64_f64(a, b) \
5229 __extension__ \
5230 ({ \
5231 float64_t a_ = (a); \
5232 uint64_t result; \
5233 __asm__ ("fcvtzu %d0,%d1,%2" \
5234 : "=w"(result) \
5235 : "w"(a_), "i"(b) \
5236 : /* No clobbers */); \
5237 result; \
5240 #define vcvtq_n_f32_s32(a, b) \
5241 __extension__ \
5242 ({ \
5243 int32x4_t a_ = (a); \
5244 float32x4_t result; \
5245 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5246 : "=w"(result) \
5247 : "w"(a_), "i"(b) \
5248 : /* No clobbers */); \
5249 result; \
5252 #define vcvtq_n_f32_u32(a, b) \
5253 __extension__ \
5254 ({ \
5255 uint32x4_t a_ = (a); \
5256 float32x4_t result; \
5257 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5258 : "=w"(result) \
5259 : "w"(a_), "i"(b) \
5260 : /* No clobbers */); \
5261 result; \
5264 #define vcvtq_n_f64_s64(a, b) \
5265 __extension__ \
5266 ({ \
5267 int64x2_t a_ = (a); \
5268 float64x2_t result; \
5269 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5270 : "=w"(result) \
5271 : "w"(a_), "i"(b) \
5272 : /* No clobbers */); \
5273 result; \
5276 #define vcvtq_n_f64_u64(a, b) \
5277 __extension__ \
5278 ({ \
5279 uint64x2_t a_ = (a); \
5280 float64x2_t result; \
5281 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5282 : "=w"(result) \
5283 : "w"(a_), "i"(b) \
5284 : /* No clobbers */); \
5285 result; \
5288 #define vcvtq_n_s32_f32(a, b) \
5289 __extension__ \
5290 ({ \
5291 float32x4_t a_ = (a); \
5292 int32x4_t result; \
5293 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5294 : "=w"(result) \
5295 : "w"(a_), "i"(b) \
5296 : /* No clobbers */); \
5297 result; \
5300 #define vcvtq_n_s64_f64(a, b) \
5301 __extension__ \
5302 ({ \
5303 float64x2_t a_ = (a); \
5304 int64x2_t result; \
5305 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5306 : "=w"(result) \
5307 : "w"(a_), "i"(b) \
5308 : /* No clobbers */); \
5309 result; \
5312 #define vcvtq_n_u32_f32(a, b) \
5313 __extension__ \
5314 ({ \
5315 float32x4_t a_ = (a); \
5316 uint32x4_t result; \
5317 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5318 : "=w"(result) \
5319 : "w"(a_), "i"(b) \
5320 : /* No clobbers */); \
5321 result; \
5324 #define vcvtq_n_u64_f64(a, b) \
5325 __extension__ \
5326 ({ \
5327 float64x2_t a_ = (a); \
5328 uint64x2_t result; \
5329 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5330 : "=w"(result) \
5331 : "w"(a_), "i"(b) \
5332 : /* No clobbers */); \
5333 result; \
5336 #define vcvts_n_f32_s32(a, b) \
5337 __extension__ \
5338 ({ \
5339 int32_t a_ = (a); \
5340 float32_t result; \
5341 __asm__ ("scvtf %s0,%s1,%2" \
5342 : "=w"(result) \
5343 : "w"(a_), "i"(b) \
5344 : /* No clobbers */); \
5345 result; \
5348 #define vcvts_n_f32_u32(a, b) \
5349 __extension__ \
5350 ({ \
5351 uint32_t a_ = (a); \
5352 float32_t result; \
5353 __asm__ ("ucvtf %s0,%s1,%2" \
5354 : "=w"(result) \
5355 : "w"(a_), "i"(b) \
5356 : /* No clobbers */); \
5357 result; \
5360 #define vcvts_n_s32_f32(a, b) \
5361 __extension__ \
5362 ({ \
5363 float32_t a_ = (a); \
5364 int32_t result; \
5365 __asm__ ("fcvtzs %s0,%s1,%2" \
5366 : "=w"(result) \
5367 : "w"(a_), "i"(b) \
5368 : /* No clobbers */); \
5369 result; \
5372 #define vcvts_n_u32_f32(a, b) \
5373 __extension__ \
5374 ({ \
5375 float32_t a_ = (a); \
5376 uint32_t result; \
5377 __asm__ ("fcvtzu %s0,%s1,%2" \
5378 : "=w"(result) \
5379 : "w"(a_), "i"(b) \
5380 : /* No clobbers */); \
5381 result; \
5384 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5385 vcvtx_f32_f64 (float64x2_t a)
5387 float32x2_t result;
5388 __asm__ ("fcvtxn %0.2s,%1.2d"
5389 : "=w"(result)
5390 : "w"(a)
5391 : /* No clobbers */);
5392 return result;
5395 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5396 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5398 float32x4_t result;
5399 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5400 : "=w"(result)
5401 : "w" (b), "0"(a)
5402 : /* No clobbers */);
5403 return result;
5406 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5407 vcvtxd_f32_f64 (float64_t a)
5409 float32_t result;
5410 __asm__ ("fcvtxn %s0,%d1"
5411 : "=w"(result)
5412 : "w"(a)
5413 : /* No clobbers */);
5414 return result;
5417 #define vext_f32(a, b, c) \
5418 __extension__ \
5419 ({ \
5420 float32x2_t b_ = (b); \
5421 float32x2_t a_ = (a); \
5422 float32x2_t result; \
5423 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5424 : "=w"(result) \
5425 : "w"(a_), "w"(b_), "i"(c) \
5426 : /* No clobbers */); \
5427 result; \
5430 #define vext_f64(a, b, c) \
5431 __extension__ \
5432 ({ \
5433 float64x1_t b_ = (b); \
5434 float64x1_t a_ = (a); \
5435 float64x1_t result; \
5436 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5437 : "=w"(result) \
5438 : "w"(a_), "w"(b_), "i"(c) \
5439 : /* No clobbers */); \
5440 result; \
5443 #define vext_p8(a, b, c) \
5444 __extension__ \
5445 ({ \
5446 poly8x8_t b_ = (b); \
5447 poly8x8_t a_ = (a); \
5448 poly8x8_t result; \
5449 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5450 : "=w"(result) \
5451 : "w"(a_), "w"(b_), "i"(c) \
5452 : /* No clobbers */); \
5453 result; \
5456 #define vext_p16(a, b, c) \
5457 __extension__ \
5458 ({ \
5459 poly16x4_t b_ = (b); \
5460 poly16x4_t a_ = (a); \
5461 poly16x4_t result; \
5462 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5463 : "=w"(result) \
5464 : "w"(a_), "w"(b_), "i"(c) \
5465 : /* No clobbers */); \
5466 result; \
5469 #define vext_s8(a, b, c) \
5470 __extension__ \
5471 ({ \
5472 int8x8_t b_ = (b); \
5473 int8x8_t a_ = (a); \
5474 int8x8_t result; \
5475 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5476 : "=w"(result) \
5477 : "w"(a_), "w"(b_), "i"(c) \
5478 : /* No clobbers */); \
5479 result; \
5482 #define vext_s16(a, b, c) \
5483 __extension__ \
5484 ({ \
5485 int16x4_t b_ = (b); \
5486 int16x4_t a_ = (a); \
5487 int16x4_t result; \
5488 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5489 : "=w"(result) \
5490 : "w"(a_), "w"(b_), "i"(c) \
5491 : /* No clobbers */); \
5492 result; \
5495 #define vext_s32(a, b, c) \
5496 __extension__ \
5497 ({ \
5498 int32x2_t b_ = (b); \
5499 int32x2_t a_ = (a); \
5500 int32x2_t result; \
5501 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5502 : "=w"(result) \
5503 : "w"(a_), "w"(b_), "i"(c) \
5504 : /* No clobbers */); \
5505 result; \
5508 #define vext_s64(a, b, c) \
5509 __extension__ \
5510 ({ \
5511 int64x1_t b_ = (b); \
5512 int64x1_t a_ = (a); \
5513 int64x1_t result; \
5514 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5515 : "=w"(result) \
5516 : "w"(a_), "w"(b_), "i"(c) \
5517 : /* No clobbers */); \
5518 result; \
5521 #define vext_u8(a, b, c) \
5522 __extension__ \
5523 ({ \
5524 uint8x8_t b_ = (b); \
5525 uint8x8_t a_ = (a); \
5526 uint8x8_t result; \
5527 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5528 : "=w"(result) \
5529 : "w"(a_), "w"(b_), "i"(c) \
5530 : /* No clobbers */); \
5531 result; \
5534 #define vext_u16(a, b, c) \
5535 __extension__ \
5536 ({ \
5537 uint16x4_t b_ = (b); \
5538 uint16x4_t a_ = (a); \
5539 uint16x4_t result; \
5540 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5541 : "=w"(result) \
5542 : "w"(a_), "w"(b_), "i"(c) \
5543 : /* No clobbers */); \
5544 result; \
5547 #define vext_u32(a, b, c) \
5548 __extension__ \
5549 ({ \
5550 uint32x2_t b_ = (b); \
5551 uint32x2_t a_ = (a); \
5552 uint32x2_t result; \
5553 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5554 : "=w"(result) \
5555 : "w"(a_), "w"(b_), "i"(c) \
5556 : /* No clobbers */); \
5557 result; \
5560 #define vext_u64(a, b, c) \
5561 __extension__ \
5562 ({ \
5563 uint64x1_t b_ = (b); \
5564 uint64x1_t a_ = (a); \
5565 uint64x1_t result; \
5566 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5567 : "=w"(result) \
5568 : "w"(a_), "w"(b_), "i"(c) \
5569 : /* No clobbers */); \
5570 result; \
5573 #define vextq_f32(a, b, c) \
5574 __extension__ \
5575 ({ \
5576 float32x4_t b_ = (b); \
5577 float32x4_t a_ = (a); \
5578 float32x4_t result; \
5579 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5580 : "=w"(result) \
5581 : "w"(a_), "w"(b_), "i"(c) \
5582 : /* No clobbers */); \
5583 result; \
5586 #define vextq_f64(a, b, c) \
5587 __extension__ \
5588 ({ \
5589 float64x2_t b_ = (b); \
5590 float64x2_t a_ = (a); \
5591 float64x2_t result; \
5592 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5593 : "=w"(result) \
5594 : "w"(a_), "w"(b_), "i"(c) \
5595 : /* No clobbers */); \
5596 result; \
5599 #define vextq_p8(a, b, c) \
5600 __extension__ \
5601 ({ \
5602 poly8x16_t b_ = (b); \
5603 poly8x16_t a_ = (a); \
5604 poly8x16_t result; \
5605 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5606 : "=w"(result) \
5607 : "w"(a_), "w"(b_), "i"(c) \
5608 : /* No clobbers */); \
5609 result; \
5612 #define vextq_p16(a, b, c) \
5613 __extension__ \
5614 ({ \
5615 poly16x8_t b_ = (b); \
5616 poly16x8_t a_ = (a); \
5617 poly16x8_t result; \
5618 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5619 : "=w"(result) \
5620 : "w"(a_), "w"(b_), "i"(c) \
5621 : /* No clobbers */); \
5622 result; \
5625 #define vextq_s8(a, b, c) \
5626 __extension__ \
5627 ({ \
5628 int8x16_t b_ = (b); \
5629 int8x16_t a_ = (a); \
5630 int8x16_t result; \
5631 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5632 : "=w"(result) \
5633 : "w"(a_), "w"(b_), "i"(c) \
5634 : /* No clobbers */); \
5635 result; \
5638 #define vextq_s16(a, b, c) \
5639 __extension__ \
5640 ({ \
5641 int16x8_t b_ = (b); \
5642 int16x8_t a_ = (a); \
5643 int16x8_t result; \
5644 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5645 : "=w"(result) \
5646 : "w"(a_), "w"(b_), "i"(c) \
5647 : /* No clobbers */); \
5648 result; \
5651 #define vextq_s32(a, b, c) \
5652 __extension__ \
5653 ({ \
5654 int32x4_t b_ = (b); \
5655 int32x4_t a_ = (a); \
5656 int32x4_t result; \
5657 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5658 : "=w"(result) \
5659 : "w"(a_), "w"(b_), "i"(c) \
5660 : /* No clobbers */); \
5661 result; \
5664 #define vextq_s64(a, b, c) \
5665 __extension__ \
5666 ({ \
5667 int64x2_t b_ = (b); \
5668 int64x2_t a_ = (a); \
5669 int64x2_t result; \
5670 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5671 : "=w"(result) \
5672 : "w"(a_), "w"(b_), "i"(c) \
5673 : /* No clobbers */); \
5674 result; \
5677 #define vextq_u8(a, b, c) \
5678 __extension__ \
5679 ({ \
5680 uint8x16_t b_ = (b); \
5681 uint8x16_t a_ = (a); \
5682 uint8x16_t result; \
5683 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5684 : "=w"(result) \
5685 : "w"(a_), "w"(b_), "i"(c) \
5686 : /* No clobbers */); \
5687 result; \
5690 #define vextq_u16(a, b, c) \
5691 __extension__ \
5692 ({ \
5693 uint16x8_t b_ = (b); \
5694 uint16x8_t a_ = (a); \
5695 uint16x8_t result; \
5696 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5697 : "=w"(result) \
5698 : "w"(a_), "w"(b_), "i"(c) \
5699 : /* No clobbers */); \
5700 result; \
5703 #define vextq_u32(a, b, c) \
5704 __extension__ \
5705 ({ \
5706 uint32x4_t b_ = (b); \
5707 uint32x4_t a_ = (a); \
5708 uint32x4_t result; \
5709 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5710 : "=w"(result) \
5711 : "w"(a_), "w"(b_), "i"(c) \
5712 : /* No clobbers */); \
5713 result; \
5716 #define vextq_u64(a, b, c) \
5717 __extension__ \
5718 ({ \
5719 uint64x2_t b_ = (b); \
5720 uint64x2_t a_ = (a); \
5721 uint64x2_t result; \
5722 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5723 : "=w"(result) \
5724 : "w"(a_), "w"(b_), "i"(c) \
5725 : /* No clobbers */); \
5726 result; \
5729 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5730 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5732 float32x2_t result;
5733 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5734 : "=w"(result)
5735 : "0"(a), "w"(b), "w"(c)
5736 : /* No clobbers */);
5737 return result;
5740 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5741 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5743 float32x4_t result;
5744 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5745 : "=w"(result)
5746 : "0"(a), "w"(b), "w"(c)
5747 : /* No clobbers */);
5748 return result;
5751 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5752 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5754 float64x2_t result;
5755 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5756 : "=w"(result)
5757 : "0"(a), "w"(b), "w"(c)
5758 : /* No clobbers */);
5759 return result;
5762 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5763 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5765 float32x2_t result;
5766 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5767 : "=w"(result)
5768 : "0"(a), "w"(b), "w"(c)
5769 : /* No clobbers */);
5770 return result;
5773 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5774 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5776 float32x4_t result;
5777 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5778 : "=w"(result)
5779 : "0"(a), "w"(b), "w"(c)
5780 : /* No clobbers */);
5781 return result;
5784 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5785 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5787 float64x2_t result;
5788 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5789 : "=w"(result)
5790 : "0"(a), "w"(b), "w"(c)
5791 : /* No clobbers */);
5792 return result;
5795 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5796 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5798 float32x2_t result;
5799 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5800 : "=w"(result)
5801 : "0"(a), "w"(b), "w"(c)
5802 : /* No clobbers */);
5803 return result;
5806 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5807 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5809 float32x4_t result;
5810 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5811 : "=w"(result)
5812 : "0"(a), "w"(b), "w"(c)
5813 : /* No clobbers */);
5814 return result;
5817 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5818 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5820 float64x2_t result;
5821 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
5822 : "=w"(result)
5823 : "0"(a), "w"(b), "w"(c)
5824 : /* No clobbers */);
5825 return result;
5828 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5829 vget_high_f32 (float32x4_t a)
5831 float32x2_t result;
5832 __asm__ ("ins %0.d[0], %1.d[1]"
5833 : "=w"(result)
5834 : "w"(a)
5835 : /* No clobbers */);
5836 return result;
5839 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5840 vget_high_f64 (float64x2_t a)
5842 float64x1_t result;
5843 __asm__ ("ins %0.d[0], %1.d[1]"
5844 : "=w"(result)
5845 : "w"(a)
5846 : /* No clobbers */);
5847 return result;
5850 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5851 vget_high_p8 (poly8x16_t a)
5853 poly8x8_t result;
5854 __asm__ ("ins %0.d[0], %1.d[1]"
5855 : "=w"(result)
5856 : "w"(a)
5857 : /* No clobbers */);
5858 return result;
5861 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5862 vget_high_p16 (poly16x8_t a)
5864 poly16x4_t result;
5865 __asm__ ("ins %0.d[0], %1.d[1]"
5866 : "=w"(result)
5867 : "w"(a)
5868 : /* No clobbers */);
5869 return result;
5872 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5873 vget_high_s8 (int8x16_t a)
5875 int8x8_t result;
5876 __asm__ ("ins %0.d[0], %1.d[1]"
5877 : "=w"(result)
5878 : "w"(a)
5879 : /* No clobbers */);
5880 return result;
5883 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5884 vget_high_s16 (int16x8_t a)
5886 int16x4_t result;
5887 __asm__ ("ins %0.d[0], %1.d[1]"
5888 : "=w"(result)
5889 : "w"(a)
5890 : /* No clobbers */);
5891 return result;
5894 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5895 vget_high_s32 (int32x4_t a)
5897 int32x2_t result;
5898 __asm__ ("ins %0.d[0], %1.d[1]"
5899 : "=w"(result)
5900 : "w"(a)
5901 : /* No clobbers */);
5902 return result;
5905 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5906 vget_high_s64 (int64x2_t a)
5908 int64x1_t result;
5909 __asm__ ("ins %0.d[0], %1.d[1]"
5910 : "=w"(result)
5911 : "w"(a)
5912 : /* No clobbers */);
5913 return result;
5916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5917 vget_high_u8 (uint8x16_t a)
5919 uint8x8_t result;
5920 __asm__ ("ins %0.d[0], %1.d[1]"
5921 : "=w"(result)
5922 : "w"(a)
5923 : /* No clobbers */);
5924 return result;
5927 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5928 vget_high_u16 (uint16x8_t a)
5930 uint16x4_t result;
5931 __asm__ ("ins %0.d[0], %1.d[1]"
5932 : "=w"(result)
5933 : "w"(a)
5934 : /* No clobbers */);
5935 return result;
5938 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5939 vget_high_u32 (uint32x4_t a)
5941 uint32x2_t result;
5942 __asm__ ("ins %0.d[0], %1.d[1]"
5943 : "=w"(result)
5944 : "w"(a)
5945 : /* No clobbers */);
5946 return result;
5949 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5950 vget_high_u64 (uint64x2_t a)
5952 uint64x1_t result;
5953 __asm__ ("ins %0.d[0], %1.d[1]"
5954 : "=w"(result)
5955 : "w"(a)
5956 : /* No clobbers */);
5957 return result;
5960 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5961 vhsub_s8 (int8x8_t a, int8x8_t b)
5963 int8x8_t result;
5964 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
5965 : "=w"(result)
5966 : "w"(a), "w"(b)
5967 : /* No clobbers */);
5968 return result;
5971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5972 vhsub_s16 (int16x4_t a, int16x4_t b)
5974 int16x4_t result;
5975 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
5976 : "=w"(result)
5977 : "w"(a), "w"(b)
5978 : /* No clobbers */);
5979 return result;
5982 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5983 vhsub_s32 (int32x2_t a, int32x2_t b)
5985 int32x2_t result;
5986 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
5987 : "=w"(result)
5988 : "w"(a), "w"(b)
5989 : /* No clobbers */);
5990 return result;
5993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5994 vhsub_u8 (uint8x8_t a, uint8x8_t b)
5996 uint8x8_t result;
5997 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
5998 : "=w"(result)
5999 : "w"(a), "w"(b)
6000 : /* No clobbers */);
6001 return result;
6004 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6005 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6007 uint16x4_t result;
6008 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6009 : "=w"(result)
6010 : "w"(a), "w"(b)
6011 : /* No clobbers */);
6012 return result;
6015 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6016 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6018 uint32x2_t result;
6019 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6020 : "=w"(result)
6021 : "w"(a), "w"(b)
6022 : /* No clobbers */);
6023 return result;
6026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6027 vhsubq_s8 (int8x16_t a, int8x16_t b)
6029 int8x16_t result;
6030 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6031 : "=w"(result)
6032 : "w"(a), "w"(b)
6033 : /* No clobbers */);
6034 return result;
6037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6038 vhsubq_s16 (int16x8_t a, int16x8_t b)
6040 int16x8_t result;
6041 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6042 : "=w"(result)
6043 : "w"(a), "w"(b)
6044 : /* No clobbers */);
6045 return result;
6048 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6049 vhsubq_s32 (int32x4_t a, int32x4_t b)
6051 int32x4_t result;
6052 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6053 : "=w"(result)
6054 : "w"(a), "w"(b)
6055 : /* No clobbers */);
6056 return result;
6059 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6060 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6062 uint8x16_t result;
6063 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6064 : "=w"(result)
6065 : "w"(a), "w"(b)
6066 : /* No clobbers */);
6067 return result;
6070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6071 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6073 uint16x8_t result;
6074 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6075 : "=w"(result)
6076 : "w"(a), "w"(b)
6077 : /* No clobbers */);
6078 return result;
6081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6082 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6084 uint32x4_t result;
6085 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6086 : "=w"(result)
6087 : "w"(a), "w"(b)
6088 : /* No clobbers */);
6089 return result;
6092 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6093 vld1_dup_f32 (const float32_t * a)
6095 float32x2_t result;
6096 __asm__ ("ld1r {%0.2s}, %1"
6097 : "=w"(result)
6098 : "Utv"(*a)
6099 : /* No clobbers */);
6100 return result;
6103 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6104 vld1_dup_f64 (const float64_t * a)
6106 float64x1_t result;
6107 __asm__ ("ld1r {%0.1d}, %1"
6108 : "=w"(result)
6109 : "Utv"(*a)
6110 : /* No clobbers */);
6111 return result;
6114 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6115 vld1_dup_p8 (const poly8_t * a)
6117 poly8x8_t result;
6118 __asm__ ("ld1r {%0.8b}, %1"
6119 : "=w"(result)
6120 : "Utv"(*a)
6121 : /* No clobbers */);
6122 return result;
6125 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6126 vld1_dup_p16 (const poly16_t * a)
6128 poly16x4_t result;
6129 __asm__ ("ld1r {%0.4h}, %1"
6130 : "=w"(result)
6131 : "Utv"(*a)
6132 : /* No clobbers */);
6133 return result;
6136 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6137 vld1_dup_s8 (const int8_t * a)
6139 int8x8_t result;
6140 __asm__ ("ld1r {%0.8b}, %1"
6141 : "=w"(result)
6142 : "Utv"(*a)
6143 : /* No clobbers */);
6144 return result;
6147 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6148 vld1_dup_s16 (const int16_t * a)
6150 int16x4_t result;
6151 __asm__ ("ld1r {%0.4h}, %1"
6152 : "=w"(result)
6153 : "Utv"(*a)
6154 : /* No clobbers */);
6155 return result;
6158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6159 vld1_dup_s32 (const int32_t * a)
6161 int32x2_t result;
6162 __asm__ ("ld1r {%0.2s}, %1"
6163 : "=w"(result)
6164 : "Utv"(*a)
6165 : /* No clobbers */);
6166 return result;
6169 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6170 vld1_dup_s64 (const int64_t * a)
6172 int64x1_t result;
6173 __asm__ ("ld1r {%0.1d}, %1"
6174 : "=w"(result)
6175 : "Utv"(*a)
6176 : /* No clobbers */);
6177 return result;
6180 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6181 vld1_dup_u8 (const uint8_t * a)
6183 uint8x8_t result;
6184 __asm__ ("ld1r {%0.8b}, %1"
6185 : "=w"(result)
6186 : "Utv"(*a)
6187 : /* No clobbers */);
6188 return result;
6191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6192 vld1_dup_u16 (const uint16_t * a)
6194 uint16x4_t result;
6195 __asm__ ("ld1r {%0.4h}, %1"
6196 : "=w"(result)
6197 : "Utv"(*a)
6198 : /* No clobbers */);
6199 return result;
6202 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6203 vld1_dup_u32 (const uint32_t * a)
6205 uint32x2_t result;
6206 __asm__ ("ld1r {%0.2s}, %1"
6207 : "=w"(result)
6208 : "Utv"(*a)
6209 : /* No clobbers */);
6210 return result;
6213 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6214 vld1_dup_u64 (const uint64_t * a)
6216 uint64x1_t result;
6217 __asm__ ("ld1r {%0.1d}, %1"
6218 : "=w"(result)
6219 : "Utv"(*a)
6220 : /* No clobbers */);
6221 return result;
6224 #define vld1_lane_f32(a, b, c) \
6225 __extension__ \
6226 ({ \
6227 float32x2_t b_ = (b); \
6228 const float32_t * a_ = (a); \
6229 float32x2_t result; \
6230 __asm__ ("ld1 {%0.s}[%1], %2" \
6231 : "=w"(result) \
6232 : "i" (c), "Utv"(*a_), "0"(b_) \
6233 : /* No clobbers */); \
6234 result; \
6237 #define vld1_lane_f64(a, b, c) \
6238 __extension__ \
6239 ({ \
6240 float64x1_t b_ = (b); \
6241 const float64_t * a_ = (a); \
6242 float64x1_t result; \
6243 __asm__ ("ld1 {%0.d}[%1], %2" \
6244 : "=w"(result) \
6245 : "i" (c), "Utv"(*a_), "0"(b_) \
6246 : /* No clobbers */); \
6247 result; \
6250 #define vld1_lane_p8(a, b, c) \
6251 __extension__ \
6252 ({ \
6253 poly8x8_t b_ = (b); \
6254 const poly8_t * a_ = (a); \
6255 poly8x8_t result; \
6256 __asm__ ("ld1 {%0.b}[%1], %2" \
6257 : "=w"(result) \
6258 : "i" (c), "Utv"(*a_), "0"(b_) \
6259 : /* No clobbers */); \
6260 result; \
6263 #define vld1_lane_p16(a, b, c) \
6264 __extension__ \
6265 ({ \
6266 poly16x4_t b_ = (b); \
6267 const poly16_t * a_ = (a); \
6268 poly16x4_t result; \
6269 __asm__ ("ld1 {%0.h}[%1], %2" \
6270 : "=w"(result) \
6271 : "i" (c), "Utv"(*a_), "0"(b_) \
6272 : /* No clobbers */); \
6273 result; \
6276 #define vld1_lane_s8(a, b, c) \
6277 __extension__ \
6278 ({ \
6279 int8x8_t b_ = (b); \
6280 const int8_t * a_ = (a); \
6281 int8x8_t result; \
6282 __asm__ ("ld1 {%0.b}[%1], %2" \
6283 : "=w"(result) \
6284 : "i" (c), "Utv"(*a_), "0"(b_) \
6285 : /* No clobbers */); \
6286 result; \
6289 #define vld1_lane_s16(a, b, c) \
6290 __extension__ \
6291 ({ \
6292 int16x4_t b_ = (b); \
6293 const int16_t * a_ = (a); \
6294 int16x4_t result; \
6295 __asm__ ("ld1 {%0.h}[%1], %2" \
6296 : "=w"(result) \
6297 : "i" (c), "Utv"(*a_), "0"(b_) \
6298 : /* No clobbers */); \
6299 result; \
6302 #define vld1_lane_s32(a, b, c) \
6303 __extension__ \
6304 ({ \
6305 int32x2_t b_ = (b); \
6306 const int32_t * a_ = (a); \
6307 int32x2_t result; \
6308 __asm__ ("ld1 {%0.s}[%1], %2" \
6309 : "=w"(result) \
6310 : "i" (c), "Utv"(*a_), "0"(b_) \
6311 : /* No clobbers */); \
6312 result; \
6315 #define vld1_lane_s64(a, b, c) \
6316 __extension__ \
6317 ({ \
6318 int64x1_t b_ = (b); \
6319 const int64_t * a_ = (a); \
6320 int64x1_t result; \
6321 __asm__ ("ld1 {%0.d}[%1], %2" \
6322 : "=w"(result) \
6323 : "i" (c), "Utv"(*a_), "0"(b_) \
6324 : /* No clobbers */); \
6325 result; \
6328 #define vld1_lane_u8(a, b, c) \
6329 __extension__ \
6330 ({ \
6331 uint8x8_t b_ = (b); \
6332 const uint8_t * a_ = (a); \
6333 uint8x8_t result; \
6334 __asm__ ("ld1 {%0.b}[%1], %2" \
6335 : "=w"(result) \
6336 : "i" (c), "Utv"(*a_), "0"(b_) \
6337 : /* No clobbers */); \
6338 result; \
6341 #define vld1_lane_u16(a, b, c) \
6342 __extension__ \
6343 ({ \
6344 uint16x4_t b_ = (b); \
6345 const uint16_t * a_ = (a); \
6346 uint16x4_t result; \
6347 __asm__ ("ld1 {%0.h}[%1], %2" \
6348 : "=w"(result) \
6349 : "i" (c), "Utv"(*a_), "0"(b_) \
6350 : /* No clobbers */); \
6351 result; \
6354 #define vld1_lane_u32(a, b, c) \
6355 __extension__ \
6356 ({ \
6357 uint32x2_t b_ = (b); \
6358 const uint32_t * a_ = (a); \
6359 uint32x2_t result; \
6360 __asm__ ("ld1 {%0.s}[%1], %2" \
6361 : "=w"(result) \
6362 : "i" (c), "Utv"(*a_), "0"(b_) \
6363 : /* No clobbers */); \
6364 result; \
6367 #define vld1_lane_u64(a, b, c) \
6368 __extension__ \
6369 ({ \
6370 uint64x1_t b_ = (b); \
6371 const uint64_t * a_ = (a); \
6372 uint64x1_t result; \
6373 __asm__ ("ld1 {%0.d}[%1], %2" \
6374 : "=w"(result) \
6375 : "i" (c), "Utv"(*a_), "0"(b_) \
6376 : /* No clobbers */); \
6377 result; \
6380 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6381 vld1q_dup_f32 (const float32_t * a)
6383 float32x4_t result;
6384 __asm__ ("ld1r {%0.4s}, %1"
6385 : "=w"(result)
6386 : "Utv"(*a)
6387 : /* No clobbers */);
6388 return result;
6391 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6392 vld1q_dup_f64 (const float64_t * a)
6394 float64x2_t result;
6395 __asm__ ("ld1r {%0.2d}, %1"
6396 : "=w"(result)
6397 : "Utv"(*a)
6398 : /* No clobbers */);
6399 return result;
6402 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6403 vld1q_dup_p8 (const poly8_t * a)
6405 poly8x16_t result;
6406 __asm__ ("ld1r {%0.16b}, %1"
6407 : "=w"(result)
6408 : "Utv"(*a)
6409 : /* No clobbers */);
6410 return result;
6413 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6414 vld1q_dup_p16 (const poly16_t * a)
6416 poly16x8_t result;
6417 __asm__ ("ld1r {%0.8h}, %1"
6418 : "=w"(result)
6419 : "Utv"(*a)
6420 : /* No clobbers */);
6421 return result;
6424 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6425 vld1q_dup_s8 (const int8_t * a)
6427 int8x16_t result;
6428 __asm__ ("ld1r {%0.16b}, %1"
6429 : "=w"(result)
6430 : "Utv"(*a)
6431 : /* No clobbers */);
6432 return result;
6435 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6436 vld1q_dup_s16 (const int16_t * a)
6438 int16x8_t result;
6439 __asm__ ("ld1r {%0.8h}, %1"
6440 : "=w"(result)
6441 : "Utv"(*a)
6442 : /* No clobbers */);
6443 return result;
6446 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6447 vld1q_dup_s32 (const int32_t * a)
6449 int32x4_t result;
6450 __asm__ ("ld1r {%0.4s}, %1"
6451 : "=w"(result)
6452 : "Utv"(*a)
6453 : /* No clobbers */);
6454 return result;
6457 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6458 vld1q_dup_s64 (const int64_t * a)
6460 int64x2_t result;
6461 __asm__ ("ld1r {%0.2d}, %1"
6462 : "=w"(result)
6463 : "Utv"(*a)
6464 : /* No clobbers */);
6465 return result;
6468 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6469 vld1q_dup_u8 (const uint8_t * a)
6471 uint8x16_t result;
6472 __asm__ ("ld1r {%0.16b}, %1"
6473 : "=w"(result)
6474 : "Utv"(*a)
6475 : /* No clobbers */);
6476 return result;
6479 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6480 vld1q_dup_u16 (const uint16_t * a)
6482 uint16x8_t result;
6483 __asm__ ("ld1r {%0.8h}, %1"
6484 : "=w"(result)
6485 : "Utv"(*a)
6486 : /* No clobbers */);
6487 return result;
6490 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6491 vld1q_dup_u32 (const uint32_t * a)
6493 uint32x4_t result;
6494 __asm__ ("ld1r {%0.4s}, %1"
6495 : "=w"(result)
6496 : "Utv"(*a)
6497 : /* No clobbers */);
6498 return result;
6501 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6502 vld1q_dup_u64 (const uint64_t * a)
6504 uint64x2_t result;
6505 __asm__ ("ld1r {%0.2d}, %1"
6506 : "=w"(result)
6507 : "Utv"(*a)
6508 : /* No clobbers */);
6509 return result;
6512 #define vld1q_lane_f32(a, b, c) \
6513 __extension__ \
6514 ({ \
6515 float32x4_t b_ = (b); \
6516 const float32_t * a_ = (a); \
6517 float32x4_t result; \
6518 __asm__ ("ld1 {%0.s}[%1], %2" \
6519 : "=w"(result) \
6520 : "i"(c), "Utv"(*a_), "0"(b_) \
6521 : /* No clobbers */); \
6522 result; \
6525 #define vld1q_lane_f64(a, b, c) \
6526 __extension__ \
6527 ({ \
6528 float64x2_t b_ = (b); \
6529 const float64_t * a_ = (a); \
6530 float64x2_t result; \
6531 __asm__ ("ld1 {%0.d}[%1], %2" \
6532 : "=w"(result) \
6533 : "i"(c), "Utv"(*a_), "0"(b_) \
6534 : /* No clobbers */); \
6535 result; \
6538 #define vld1q_lane_p8(a, b, c) \
6539 __extension__ \
6540 ({ \
6541 poly8x16_t b_ = (b); \
6542 const poly8_t * a_ = (a); \
6543 poly8x16_t result; \
6544 __asm__ ("ld1 {%0.b}[%1], %2" \
6545 : "=w"(result) \
6546 : "i"(c), "Utv"(*a_), "0"(b_) \
6547 : /* No clobbers */); \
6548 result; \
6551 #define vld1q_lane_p16(a, b, c) \
6552 __extension__ \
6553 ({ \
6554 poly16x8_t b_ = (b); \
6555 const poly16_t * a_ = (a); \
6556 poly16x8_t result; \
6557 __asm__ ("ld1 {%0.h}[%1], %2" \
6558 : "=w"(result) \
6559 : "i"(c), "Utv"(*a_), "0"(b_) \
6560 : /* No clobbers */); \
6561 result; \
6564 #define vld1q_lane_s8(a, b, c) \
6565 __extension__ \
6566 ({ \
6567 int8x16_t b_ = (b); \
6568 const int8_t * a_ = (a); \
6569 int8x16_t result; \
6570 __asm__ ("ld1 {%0.b}[%1], %2" \
6571 : "=w"(result) \
6572 : "i"(c), "Utv"(*a_), "0"(b_) \
6573 : /* No clobbers */); \
6574 result; \
6577 #define vld1q_lane_s16(a, b, c) \
6578 __extension__ \
6579 ({ \
6580 int16x8_t b_ = (b); \
6581 const int16_t * a_ = (a); \
6582 int16x8_t result; \
6583 __asm__ ("ld1 {%0.h}[%1], %2" \
6584 : "=w"(result) \
6585 : "i"(c), "Utv"(*a_), "0"(b_) \
6586 : /* No clobbers */); \
6587 result; \
6590 #define vld1q_lane_s32(a, b, c) \
6591 __extension__ \
6592 ({ \
6593 int32x4_t b_ = (b); \
6594 const int32_t * a_ = (a); \
6595 int32x4_t result; \
6596 __asm__ ("ld1 {%0.s}[%1], %2" \
6597 : "=w"(result) \
6598 : "i"(c), "Utv"(*a_), "0"(b_) \
6599 : /* No clobbers */); \
6600 result; \
6603 #define vld1q_lane_s64(a, b, c) \
6604 __extension__ \
6605 ({ \
6606 int64x2_t b_ = (b); \
6607 const int64_t * a_ = (a); \
6608 int64x2_t result; \
6609 __asm__ ("ld1 {%0.d}[%1], %2" \
6610 : "=w"(result) \
6611 : "i"(c), "Utv"(*a_), "0"(b_) \
6612 : /* No clobbers */); \
6613 result; \
6616 #define vld1q_lane_u8(a, b, c) \
6617 __extension__ \
6618 ({ \
6619 uint8x16_t b_ = (b); \
6620 const uint8_t * a_ = (a); \
6621 uint8x16_t result; \
6622 __asm__ ("ld1 {%0.b}[%1], %2" \
6623 : "=w"(result) \
6624 : "i"(c), "Utv"(*a_), "0"(b_) \
6625 : /* No clobbers */); \
6626 result; \
6629 #define vld1q_lane_u16(a, b, c) \
6630 __extension__ \
6631 ({ \
6632 uint16x8_t b_ = (b); \
6633 const uint16_t * a_ = (a); \
6634 uint16x8_t result; \
6635 __asm__ ("ld1 {%0.h}[%1], %2" \
6636 : "=w"(result) \
6637 : "i"(c), "Utv"(*a_), "0"(b_) \
6638 : /* No clobbers */); \
6639 result; \
6642 #define vld1q_lane_u32(a, b, c) \
6643 __extension__ \
6644 ({ \
6645 uint32x4_t b_ = (b); \
6646 const uint32_t * a_ = (a); \
6647 uint32x4_t result; \
6648 __asm__ ("ld1 {%0.s}[%1], %2" \
6649 : "=w"(result) \
6650 : "i"(c), "Utv"(*a_), "0"(b_) \
6651 : /* No clobbers */); \
6652 result; \
6655 #define vld1q_lane_u64(a, b, c) \
6656 __extension__ \
6657 ({ \
6658 uint64x2_t b_ = (b); \
6659 const uint64_t * a_ = (a); \
6660 uint64x2_t result; \
6661 __asm__ ("ld1 {%0.d}[%1], %2" \
6662 : "=w"(result) \
6663 : "i"(c), "Utv"(*a_), "0"(b_) \
6664 : /* No clobbers */); \
6665 result; \
6668 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6669 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6671 float32x2_t result;
6672 float32x2_t t1;
6673 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6674 : "=w"(result), "=w"(t1)
6675 : "0"(a), "w"(b), "w"(c)
6676 : /* No clobbers */);
6677 return result;
6680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6681 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6683 int16x4_t result;
6684 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6685 : "=w"(result)
6686 : "0"(a), "w"(b), "x"(c)
6687 : /* No clobbers */);
6688 return result;
6691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6692 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6694 int32x2_t result;
6695 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6696 : "=w"(result)
6697 : "0"(a), "w"(b), "w"(c)
6698 : /* No clobbers */);
6699 return result;
6702 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6703 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6705 uint16x4_t result;
6706 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6707 : "=w"(result)
6708 : "0"(a), "w"(b), "x"(c)
6709 : /* No clobbers */);
6710 return result;
6713 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6714 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6716 uint32x2_t result;
6717 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6718 : "=w"(result)
6719 : "0"(a), "w"(b), "w"(c)
6720 : /* No clobbers */);
6721 return result;
6724 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6725 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6727 int8x8_t result;
6728 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6729 : "=w"(result)
6730 : "0"(a), "w"(b), "w"(c)
6731 : /* No clobbers */);
6732 return result;
6735 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6736 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6738 int16x4_t result;
6739 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6740 : "=w"(result)
6741 : "0"(a), "w"(b), "w"(c)
6742 : /* No clobbers */);
6743 return result;
6746 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6747 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6749 int32x2_t result;
6750 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6751 : "=w"(result)
6752 : "0"(a), "w"(b), "w"(c)
6753 : /* No clobbers */);
6754 return result;
6757 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6758 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6760 uint8x8_t result;
6761 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6762 : "=w"(result)
6763 : "0"(a), "w"(b), "w"(c)
6764 : /* No clobbers */);
6765 return result;
6768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6769 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6771 uint16x4_t result;
6772 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6773 : "=w"(result)
6774 : "0"(a), "w"(b), "w"(c)
6775 : /* No clobbers */);
6776 return result;
6779 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6780 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6782 uint32x2_t result;
6783 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6784 : "=w"(result)
6785 : "0"(a), "w"(b), "w"(c)
6786 : /* No clobbers */);
6787 return result;
6790 #define vmlal_high_lane_s16(a, b, c, d) \
6791 __extension__ \
6792 ({ \
6793 int16x8_t c_ = (c); \
6794 int16x8_t b_ = (b); \
6795 int32x4_t a_ = (a); \
6796 int32x4_t result; \
6797 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6798 : "=w"(result) \
6799 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6800 : /* No clobbers */); \
6801 result; \
6804 #define vmlal_high_lane_s32(a, b, c, d) \
6805 __extension__ \
6806 ({ \
6807 int32x4_t c_ = (c); \
6808 int32x4_t b_ = (b); \
6809 int64x2_t a_ = (a); \
6810 int64x2_t result; \
6811 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6812 : "=w"(result) \
6813 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6814 : /* No clobbers */); \
6815 result; \
6818 #define vmlal_high_lane_u16(a, b, c, d) \
6819 __extension__ \
6820 ({ \
6821 uint16x8_t c_ = (c); \
6822 uint16x8_t b_ = (b); \
6823 uint32x4_t a_ = (a); \
6824 uint32x4_t result; \
6825 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6826 : "=w"(result) \
6827 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6828 : /* No clobbers */); \
6829 result; \
6832 #define vmlal_high_lane_u32(a, b, c, d) \
6833 __extension__ \
6834 ({ \
6835 uint32x4_t c_ = (c); \
6836 uint32x4_t b_ = (b); \
6837 uint64x2_t a_ = (a); \
6838 uint64x2_t result; \
6839 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6840 : "=w"(result) \
6841 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6842 : /* No clobbers */); \
6843 result; \
6846 #define vmlal_high_laneq_s16(a, b, c, d) \
6847 __extension__ \
6848 ({ \
6849 int16x8_t c_ = (c); \
6850 int16x8_t b_ = (b); \
6851 int32x4_t a_ = (a); \
6852 int32x4_t result; \
6853 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6854 : "=w"(result) \
6855 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6856 : /* No clobbers */); \
6857 result; \
6860 #define vmlal_high_laneq_s32(a, b, c, d) \
6861 __extension__ \
6862 ({ \
6863 int32x4_t c_ = (c); \
6864 int32x4_t b_ = (b); \
6865 int64x2_t a_ = (a); \
6866 int64x2_t result; \
6867 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6868 : "=w"(result) \
6869 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6870 : /* No clobbers */); \
6871 result; \
6874 #define vmlal_high_laneq_u16(a, b, c, d) \
6875 __extension__ \
6876 ({ \
6877 uint16x8_t c_ = (c); \
6878 uint16x8_t b_ = (b); \
6879 uint32x4_t a_ = (a); \
6880 uint32x4_t result; \
6881 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6882 : "=w"(result) \
6883 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6884 : /* No clobbers */); \
6885 result; \
6888 #define vmlal_high_laneq_u32(a, b, c, d) \
6889 __extension__ \
6890 ({ \
6891 uint32x4_t c_ = (c); \
6892 uint32x4_t b_ = (b); \
6893 uint64x2_t a_ = (a); \
6894 uint64x2_t result; \
6895 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6896 : "=w"(result) \
6897 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6898 : /* No clobbers */); \
6899 result; \
6902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6903 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6905 int32x4_t result;
6906 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6907 : "=w"(result)
6908 : "0"(a), "w"(b), "x"(c)
6909 : /* No clobbers */);
6910 return result;
6913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6914 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6916 int64x2_t result;
6917 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6918 : "=w"(result)
6919 : "0"(a), "w"(b), "w"(c)
6920 : /* No clobbers */);
6921 return result;
6924 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6925 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6927 uint32x4_t result;
6928 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6929 : "=w"(result)
6930 : "0"(a), "w"(b), "x"(c)
6931 : /* No clobbers */);
6932 return result;
6935 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6936 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6938 uint64x2_t result;
6939 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6940 : "=w"(result)
6941 : "0"(a), "w"(b), "w"(c)
6942 : /* No clobbers */);
6943 return result;
6946 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6947 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6949 int16x8_t result;
6950 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6951 : "=w"(result)
6952 : "0"(a), "w"(b), "w"(c)
6953 : /* No clobbers */);
6954 return result;
6957 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6958 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6960 int32x4_t result;
6961 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6962 : "=w"(result)
6963 : "0"(a), "w"(b), "w"(c)
6964 : /* No clobbers */);
6965 return result;
6968 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6969 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6971 int64x2_t result;
6972 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6973 : "=w"(result)
6974 : "0"(a), "w"(b), "w"(c)
6975 : /* No clobbers */);
6976 return result;
6979 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6980 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6982 uint16x8_t result;
6983 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6984 : "=w"(result)
6985 : "0"(a), "w"(b), "w"(c)
6986 : /* No clobbers */);
6987 return result;
6990 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6991 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6993 uint32x4_t result;
6994 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6995 : "=w"(result)
6996 : "0"(a), "w"(b), "w"(c)
6997 : /* No clobbers */);
6998 return result;
7001 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7002 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7004 uint64x2_t result;
7005 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7006 : "=w"(result)
7007 : "0"(a), "w"(b), "w"(c)
7008 : /* No clobbers */);
7009 return result;
7012 #define vmlal_lane_s16(a, b, c, d) \
7013 __extension__ \
7014 ({ \
7015 int16x4_t c_ = (c); \
7016 int16x4_t b_ = (b); \
7017 int32x4_t a_ = (a); \
7018 int32x4_t result; \
7019 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7020 : "=w"(result) \
7021 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7022 : /* No clobbers */); \
7023 result; \
7026 #define vmlal_lane_s32(a, b, c, d) \
7027 __extension__ \
7028 ({ \
7029 int32x2_t c_ = (c); \
7030 int32x2_t b_ = (b); \
7031 int64x2_t a_ = (a); \
7032 int64x2_t result; \
7033 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7034 : "=w"(result) \
7035 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7036 : /* No clobbers */); \
7037 result; \
7040 #define vmlal_lane_u16(a, b, c, d) \
7041 __extension__ \
7042 ({ \
7043 uint16x4_t c_ = (c); \
7044 uint16x4_t b_ = (b); \
7045 uint32x4_t a_ = (a); \
7046 uint32x4_t result; \
7047 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7048 : "=w"(result) \
7049 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7050 : /* No clobbers */); \
7051 result; \
7054 #define vmlal_lane_u32(a, b, c, d) \
7055 __extension__ \
7056 ({ \
7057 uint32x2_t c_ = (c); \
7058 uint32x2_t b_ = (b); \
7059 uint64x2_t a_ = (a); \
7060 uint64x2_t result; \
7061 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7062 : "=w"(result) \
7063 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7064 : /* No clobbers */); \
7065 result; \
7068 #define vmlal_laneq_s16(a, b, c, d) \
7069 __extension__ \
7070 ({ \
7071 int16x8_t c_ = (c); \
7072 int16x4_t b_ = (b); \
7073 int32x4_t a_ = (a); \
7074 int32x4_t result; \
7075 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7076 : "=w"(result) \
7077 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7078 : /* No clobbers */); \
7079 result; \
7082 #define vmlal_laneq_s32(a, b, c, d) \
7083 __extension__ \
7084 ({ \
7085 int32x4_t c_ = (c); \
7086 int32x2_t b_ = (b); \
7087 int64x2_t a_ = (a); \
7088 int64x2_t result; \
7089 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7090 : "=w"(result) \
7091 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7092 : /* No clobbers */); \
7093 result; \
7096 #define vmlal_laneq_u16(a, b, c, d) \
7097 __extension__ \
7098 ({ \
7099 uint16x8_t c_ = (c); \
7100 uint16x4_t b_ = (b); \
7101 uint32x4_t a_ = (a); \
7102 uint32x4_t result; \
7103 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7104 : "=w"(result) \
7105 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7106 : /* No clobbers */); \
7107 result; \
7110 #define vmlal_laneq_u32(a, b, c, d) \
7111 __extension__ \
7112 ({ \
7113 uint32x4_t c_ = (c); \
7114 uint32x2_t b_ = (b); \
7115 uint64x2_t a_ = (a); \
7116 uint64x2_t result; \
7117 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7118 : "=w"(result) \
7119 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7120 : /* No clobbers */); \
7121 result; \
7124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7125 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7127 int32x4_t result;
7128 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7129 : "=w"(result)
7130 : "0"(a), "w"(b), "x"(c)
7131 : /* No clobbers */);
7132 return result;
7135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7136 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7138 int64x2_t result;
7139 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7140 : "=w"(result)
7141 : "0"(a), "w"(b), "w"(c)
7142 : /* No clobbers */);
7143 return result;
7146 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7147 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7149 uint32x4_t result;
7150 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7151 : "=w"(result)
7152 : "0"(a), "w"(b), "x"(c)
7153 : /* No clobbers */);
7154 return result;
7157 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7158 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7160 uint64x2_t result;
7161 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7162 : "=w"(result)
7163 : "0"(a), "w"(b), "w"(c)
7164 : /* No clobbers */);
7165 return result;
7168 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7169 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7171 int16x8_t result;
7172 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7173 : "=w"(result)
7174 : "0"(a), "w"(b), "w"(c)
7175 : /* No clobbers */);
7176 return result;
7179 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7180 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7182 int32x4_t result;
7183 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7184 : "=w"(result)
7185 : "0"(a), "w"(b), "w"(c)
7186 : /* No clobbers */);
7187 return result;
7190 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7191 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7193 int64x2_t result;
7194 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7195 : "=w"(result)
7196 : "0"(a), "w"(b), "w"(c)
7197 : /* No clobbers */);
7198 return result;
7201 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7202 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7204 uint16x8_t result;
7205 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7206 : "=w"(result)
7207 : "0"(a), "w"(b), "w"(c)
7208 : /* No clobbers */);
7209 return result;
7212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7213 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7215 uint32x4_t result;
7216 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7217 : "=w"(result)
7218 : "0"(a), "w"(b), "w"(c)
7219 : /* No clobbers */);
7220 return result;
7223 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7224 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7226 uint64x2_t result;
7227 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7228 : "=w"(result)
7229 : "0"(a), "w"(b), "w"(c)
7230 : /* No clobbers */);
7231 return result;
7234 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7235 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7237 float32x4_t result;
7238 float32x4_t t1;
7239 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7240 : "=w"(result), "=w"(t1)
7241 : "0"(a), "w"(b), "w"(c)
7242 : /* No clobbers */);
7243 return result;
7246 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7247 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7249 float64x2_t result;
7250 float64x2_t t1;
7251 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
7252 : "=w"(result), "=w"(t1)
7253 : "0"(a), "w"(b), "w"(c)
7254 : /* No clobbers */);
7255 return result;
7258 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7259 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7261 int16x8_t result;
7262 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7263 : "=w"(result)
7264 : "0"(a), "w"(b), "x"(c)
7265 : /* No clobbers */);
7266 return result;
7269 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7270 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7272 int32x4_t result;
7273 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7274 : "=w"(result)
7275 : "0"(a), "w"(b), "w"(c)
7276 : /* No clobbers */);
7277 return result;
7280 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7281 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7283 uint16x8_t result;
7284 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7285 : "=w"(result)
7286 : "0"(a), "w"(b), "x"(c)
7287 : /* No clobbers */);
7288 return result;
7291 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7292 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7294 uint32x4_t result;
7295 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7296 : "=w"(result)
7297 : "0"(a), "w"(b), "w"(c)
7298 : /* No clobbers */);
7299 return result;
7302 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7303 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7305 int8x16_t result;
7306 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7307 : "=w"(result)
7308 : "0"(a), "w"(b), "w"(c)
7309 : /* No clobbers */);
7310 return result;
7313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7314 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7316 int16x8_t result;
7317 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7318 : "=w"(result)
7319 : "0"(a), "w"(b), "w"(c)
7320 : /* No clobbers */);
7321 return result;
7324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7325 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7327 int32x4_t result;
7328 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7329 : "=w"(result)
7330 : "0"(a), "w"(b), "w"(c)
7331 : /* No clobbers */);
7332 return result;
7335 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7336 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7338 uint8x16_t result;
7339 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7340 : "=w"(result)
7341 : "0"(a), "w"(b), "w"(c)
7342 : /* No clobbers */);
7343 return result;
7346 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7347 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7349 uint16x8_t result;
7350 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7351 : "=w"(result)
7352 : "0"(a), "w"(b), "w"(c)
7353 : /* No clobbers */);
7354 return result;
7357 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7358 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7360 uint32x4_t result;
7361 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7362 : "=w"(result)
7363 : "0"(a), "w"(b), "w"(c)
7364 : /* No clobbers */);
7365 return result;
7368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7369 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7371 float32x2_t result;
7372 float32x2_t t1;
7373 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7374 : "=w"(result), "=w"(t1)
7375 : "0"(a), "w"(b), "w"(c)
7376 : /* No clobbers */);
7377 return result;
7380 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7381 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7383 int16x4_t result;
7384 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7385 : "=w"(result)
7386 : "0"(a), "w"(b), "x"(c)
7387 : /* No clobbers */);
7388 return result;
7391 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7392 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7394 int32x2_t result;
7395 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7396 : "=w"(result)
7397 : "0"(a), "w"(b), "w"(c)
7398 : /* No clobbers */);
7399 return result;
7402 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7403 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7405 uint16x4_t result;
7406 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7407 : "=w"(result)
7408 : "0"(a), "w"(b), "x"(c)
7409 : /* No clobbers */);
7410 return result;
7413 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7414 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7416 uint32x2_t result;
7417 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7418 : "=w"(result)
7419 : "0"(a), "w"(b), "w"(c)
7420 : /* No clobbers */);
7421 return result;
7424 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7425 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7427 int8x8_t result;
7428 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7429 : "=w"(result)
7430 : "0"(a), "w"(b), "w"(c)
7431 : /* No clobbers */);
7432 return result;
7435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7436 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7438 int16x4_t result;
7439 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7440 : "=w"(result)
7441 : "0"(a), "w"(b), "w"(c)
7442 : /* No clobbers */);
7443 return result;
7446 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7447 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7449 int32x2_t result;
7450 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7451 : "=w"(result)
7452 : "0"(a), "w"(b), "w"(c)
7453 : /* No clobbers */);
7454 return result;
7457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7458 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7460 uint8x8_t result;
7461 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7462 : "=w"(result)
7463 : "0"(a), "w"(b), "w"(c)
7464 : /* No clobbers */);
7465 return result;
7468 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7469 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7471 uint16x4_t result;
7472 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7473 : "=w"(result)
7474 : "0"(a), "w"(b), "w"(c)
7475 : /* No clobbers */);
7476 return result;
7479 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7480 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7482 uint32x2_t result;
7483 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7484 : "=w"(result)
7485 : "0"(a), "w"(b), "w"(c)
7486 : /* No clobbers */);
7487 return result;
7490 #define vmlsl_high_lane_s16(a, b, c, d) \
7491 __extension__ \
7492 ({ \
7493 int16x8_t c_ = (c); \
7494 int16x8_t b_ = (b); \
7495 int32x4_t a_ = (a); \
7496 int32x4_t result; \
7497 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7498 : "=w"(result) \
7499 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7500 : /* No clobbers */); \
7501 result; \
7504 #define vmlsl_high_lane_s32(a, b, c, d) \
7505 __extension__ \
7506 ({ \
7507 int32x4_t c_ = (c); \
7508 int32x4_t b_ = (b); \
7509 int64x2_t a_ = (a); \
7510 int64x2_t result; \
7511 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7512 : "=w"(result) \
7513 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7514 : /* No clobbers */); \
7515 result; \
7518 #define vmlsl_high_lane_u16(a, b, c, d) \
7519 __extension__ \
7520 ({ \
7521 uint16x8_t c_ = (c); \
7522 uint16x8_t b_ = (b); \
7523 uint32x4_t a_ = (a); \
7524 uint32x4_t result; \
7525 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7526 : "=w"(result) \
7527 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7528 : /* No clobbers */); \
7529 result; \
7532 #define vmlsl_high_lane_u32(a, b, c, d) \
7533 __extension__ \
7534 ({ \
7535 uint32x4_t c_ = (c); \
7536 uint32x4_t b_ = (b); \
7537 uint64x2_t a_ = (a); \
7538 uint64x2_t result; \
7539 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7540 : "=w"(result) \
7541 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7542 : /* No clobbers */); \
7543 result; \
7546 #define vmlsl_high_laneq_s16(a, b, c, d) \
7547 __extension__ \
7548 ({ \
7549 int16x8_t c_ = (c); \
7550 int16x8_t b_ = (b); \
7551 int32x4_t a_ = (a); \
7552 int32x4_t result; \
7553 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7554 : "=w"(result) \
7555 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7556 : /* No clobbers */); \
7557 result; \
7560 #define vmlsl_high_laneq_s32(a, b, c, d) \
7561 __extension__ \
7562 ({ \
7563 int32x4_t c_ = (c); \
7564 int32x4_t b_ = (b); \
7565 int64x2_t a_ = (a); \
7566 int64x2_t result; \
7567 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7568 : "=w"(result) \
7569 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7570 : /* No clobbers */); \
7571 result; \
7574 #define vmlsl_high_laneq_u16(a, b, c, d) \
7575 __extension__ \
7576 ({ \
7577 uint16x8_t c_ = (c); \
7578 uint16x8_t b_ = (b); \
7579 uint32x4_t a_ = (a); \
7580 uint32x4_t result; \
7581 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7582 : "=w"(result) \
7583 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7584 : /* No clobbers */); \
7585 result; \
7588 #define vmlsl_high_laneq_u32(a, b, c, d) \
7589 __extension__ \
7590 ({ \
7591 uint32x4_t c_ = (c); \
7592 uint32x4_t b_ = (b); \
7593 uint64x2_t a_ = (a); \
7594 uint64x2_t result; \
7595 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7596 : "=w"(result) \
7597 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7598 : /* No clobbers */); \
7599 result; \
7602 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7603 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7605 int32x4_t result;
7606 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7607 : "=w"(result)
7608 : "0"(a), "w"(b), "x"(c)
7609 : /* No clobbers */);
7610 return result;
7613 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7614 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7616 int64x2_t result;
7617 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7618 : "=w"(result)
7619 : "0"(a), "w"(b), "w"(c)
7620 : /* No clobbers */);
7621 return result;
7624 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7625 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7627 uint32x4_t result;
7628 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7629 : "=w"(result)
7630 : "0"(a), "w"(b), "x"(c)
7631 : /* No clobbers */);
7632 return result;
7635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7636 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7638 uint64x2_t result;
7639 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7640 : "=w"(result)
7641 : "0"(a), "w"(b), "w"(c)
7642 : /* No clobbers */);
7643 return result;
7646 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7647 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7649 int16x8_t result;
7650 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7651 : "=w"(result)
7652 : "0"(a), "w"(b), "w"(c)
7653 : /* No clobbers */);
7654 return result;
7657 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7658 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7660 int32x4_t result;
7661 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7662 : "=w"(result)
7663 : "0"(a), "w"(b), "w"(c)
7664 : /* No clobbers */);
7665 return result;
7668 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7669 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7671 int64x2_t result;
7672 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7673 : "=w"(result)
7674 : "0"(a), "w"(b), "w"(c)
7675 : /* No clobbers */);
7676 return result;
7679 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7680 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7682 uint16x8_t result;
7683 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7684 : "=w"(result)
7685 : "0"(a), "w"(b), "w"(c)
7686 : /* No clobbers */);
7687 return result;
7690 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7691 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7693 uint32x4_t result;
7694 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7695 : "=w"(result)
7696 : "0"(a), "w"(b), "w"(c)
7697 : /* No clobbers */);
7698 return result;
7701 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7702 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7704 uint64x2_t result;
7705 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7706 : "=w"(result)
7707 : "0"(a), "w"(b), "w"(c)
7708 : /* No clobbers */);
7709 return result;
7712 #define vmlsl_lane_s16(a, b, c, d) \
7713 __extension__ \
7714 ({ \
7715 int16x4_t c_ = (c); \
7716 int16x4_t b_ = (b); \
7717 int32x4_t a_ = (a); \
7718 int32x4_t result; \
7719 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7720 : "=w"(result) \
7721 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7722 : /* No clobbers */); \
7723 result; \
7726 #define vmlsl_lane_s32(a, b, c, d) \
7727 __extension__ \
7728 ({ \
7729 int32x2_t c_ = (c); \
7730 int32x2_t b_ = (b); \
7731 int64x2_t a_ = (a); \
7732 int64x2_t result; \
7733 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7734 : "=w"(result) \
7735 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7736 : /* No clobbers */); \
7737 result; \
7740 #define vmlsl_lane_u16(a, b, c, d) \
7741 __extension__ \
7742 ({ \
7743 uint16x4_t c_ = (c); \
7744 uint16x4_t b_ = (b); \
7745 uint32x4_t a_ = (a); \
7746 uint32x4_t result; \
7747 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7748 : "=w"(result) \
7749 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7750 : /* No clobbers */); \
7751 result; \
7754 #define vmlsl_lane_u32(a, b, c, d) \
7755 __extension__ \
7756 ({ \
7757 uint32x2_t c_ = (c); \
7758 uint32x2_t b_ = (b); \
7759 uint64x2_t a_ = (a); \
7760 uint64x2_t result; \
7761 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7762 : "=w"(result) \
7763 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7764 : /* No clobbers */); \
7765 result; \
7768 #define vmlsl_laneq_s16(a, b, c, d) \
7769 __extension__ \
7770 ({ \
7771 int16x8_t c_ = (c); \
7772 int16x4_t b_ = (b); \
7773 int32x4_t a_ = (a); \
7774 int32x4_t result; \
7775 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7776 : "=w"(result) \
7777 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7778 : /* No clobbers */); \
7779 result; \
7782 #define vmlsl_laneq_s32(a, b, c, d) \
7783 __extension__ \
7784 ({ \
7785 int32x4_t c_ = (c); \
7786 int32x2_t b_ = (b); \
7787 int64x2_t a_ = (a); \
7788 int64x2_t result; \
7789 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7790 : "=w"(result) \
7791 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7792 : /* No clobbers */); \
7793 result; \
7796 #define vmlsl_laneq_u16(a, b, c, d) \
7797 __extension__ \
7798 ({ \
7799 uint16x8_t c_ = (c); \
7800 uint16x4_t b_ = (b); \
7801 uint32x4_t a_ = (a); \
7802 uint32x4_t result; \
7803 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7804 : "=w"(result) \
7805 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7806 : /* No clobbers */); \
7807 result; \
7810 #define vmlsl_laneq_u32(a, b, c, d) \
7811 __extension__ \
7812 ({ \
7813 uint32x4_t c_ = (c); \
7814 uint32x2_t b_ = (b); \
7815 uint64x2_t a_ = (a); \
7816 uint64x2_t result; \
7817 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7818 : "=w"(result) \
7819 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7820 : /* No clobbers */); \
7821 result; \
7824 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7825 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7827 int32x4_t result;
7828 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7829 : "=w"(result)
7830 : "0"(a), "w"(b), "x"(c)
7831 : /* No clobbers */);
7832 return result;
7835 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7836 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7838 int64x2_t result;
7839 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7840 : "=w"(result)
7841 : "0"(a), "w"(b), "w"(c)
7842 : /* No clobbers */);
7843 return result;
7846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7847 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7849 uint32x4_t result;
7850 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7851 : "=w"(result)
7852 : "0"(a), "w"(b), "x"(c)
7853 : /* No clobbers */);
7854 return result;
7857 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7858 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7860 uint64x2_t result;
7861 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7862 : "=w"(result)
7863 : "0"(a), "w"(b), "w"(c)
7864 : /* No clobbers */);
7865 return result;
7868 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7869 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7871 int16x8_t result;
7872 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7873 : "=w"(result)
7874 : "0"(a), "w"(b), "w"(c)
7875 : /* No clobbers */);
7876 return result;
7879 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7880 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7882 int32x4_t result;
7883 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7884 : "=w"(result)
7885 : "0"(a), "w"(b), "w"(c)
7886 : /* No clobbers */);
7887 return result;
7890 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7891 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7893 int64x2_t result;
7894 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7895 : "=w"(result)
7896 : "0"(a), "w"(b), "w"(c)
7897 : /* No clobbers */);
7898 return result;
7901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7902 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7904 uint16x8_t result;
7905 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7906 : "=w"(result)
7907 : "0"(a), "w"(b), "w"(c)
7908 : /* No clobbers */);
7909 return result;
7912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7913 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7915 uint32x4_t result;
7916 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7917 : "=w"(result)
7918 : "0"(a), "w"(b), "w"(c)
7919 : /* No clobbers */);
7920 return result;
7923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7924 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7926 uint64x2_t result;
7927 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7928 : "=w"(result)
7929 : "0"(a), "w"(b), "w"(c)
7930 : /* No clobbers */);
7931 return result;
7934 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7935 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7937 float32x4_t result;
7938 float32x4_t t1;
7939 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7940 : "=w"(result), "=w"(t1)
7941 : "0"(a), "w"(b), "w"(c)
7942 : /* No clobbers */);
7943 return result;
7946 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7947 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7949 float64x2_t result;
7950 float64x2_t t1;
7951 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
7952 : "=w"(result), "=w"(t1)
7953 : "0"(a), "w"(b), "x"(c)
7954 : /* No clobbers */);
7955 return result;
7958 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7959 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7961 int16x8_t result;
7962 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7963 : "=w"(result)
7964 : "0"(a), "w"(b), "x"(c)
7965 : /* No clobbers */);
7966 return result;
7969 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7970 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7972 int32x4_t result;
7973 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7974 : "=w"(result)
7975 : "0"(a), "w"(b), "w"(c)
7976 : /* No clobbers */);
7977 return result;
7980 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7981 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7983 uint16x8_t result;
7984 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7985 : "=w"(result)
7986 : "0"(a), "w"(b), "x"(c)
7987 : /* No clobbers */);
7988 return result;
7991 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7992 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7994 uint32x4_t result;
7995 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7996 : "=w"(result)
7997 : "0"(a), "w"(b), "w"(c)
7998 : /* No clobbers */);
7999 return result;
8002 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8003 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8005 int8x16_t result;
8006 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8007 : "=w"(result)
8008 : "0"(a), "w"(b), "w"(c)
8009 : /* No clobbers */);
8010 return result;
8013 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8014 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8016 int16x8_t result;
8017 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8018 : "=w"(result)
8019 : "0"(a), "w"(b), "w"(c)
8020 : /* No clobbers */);
8021 return result;
8024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8025 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8027 int32x4_t result;
8028 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8029 : "=w"(result)
8030 : "0"(a), "w"(b), "w"(c)
8031 : /* No clobbers */);
8032 return result;
8035 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8036 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8038 uint8x16_t result;
8039 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8040 : "=w"(result)
8041 : "0"(a), "w"(b), "w"(c)
8042 : /* No clobbers */);
8043 return result;
8046 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8047 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8049 uint16x8_t result;
8050 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8051 : "=w"(result)
8052 : "0"(a), "w"(b), "w"(c)
8053 : /* No clobbers */);
8054 return result;
8057 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8058 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8060 uint32x4_t result;
8061 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8062 : "=w"(result)
8063 : "0"(a), "w"(b), "w"(c)
8064 : /* No clobbers */);
8065 return result;
8068 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8069 vmovl_high_s8 (int8x16_t a)
8071 int16x8_t result;
8072 __asm__ ("sshll2 %0.8h,%1.16b,#0"
8073 : "=w"(result)
8074 : "w"(a)
8075 : /* No clobbers */);
8076 return result;
8079 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8080 vmovl_high_s16 (int16x8_t a)
8082 int32x4_t result;
8083 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8084 : "=w"(result)
8085 : "w"(a)
8086 : /* No clobbers */);
8087 return result;
8090 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8091 vmovl_high_s32 (int32x4_t a)
8093 int64x2_t result;
8094 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8095 : "=w"(result)
8096 : "w"(a)
8097 : /* No clobbers */);
8098 return result;
8101 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8102 vmovl_high_u8 (uint8x16_t a)
8104 uint16x8_t result;
8105 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8106 : "=w"(result)
8107 : "w"(a)
8108 : /* No clobbers */);
8109 return result;
8112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8113 vmovl_high_u16 (uint16x8_t a)
8115 uint32x4_t result;
8116 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8117 : "=w"(result)
8118 : "w"(a)
8119 : /* No clobbers */);
8120 return result;
8123 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8124 vmovl_high_u32 (uint32x4_t a)
8126 uint64x2_t result;
8127 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8128 : "=w"(result)
8129 : "w"(a)
8130 : /* No clobbers */);
8131 return result;
8134 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8135 vmovl_s8 (int8x8_t a)
8137 int16x8_t result;
8138 __asm__ ("sshll %0.8h,%1.8b,#0"
8139 : "=w"(result)
8140 : "w"(a)
8141 : /* No clobbers */);
8142 return result;
8145 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8146 vmovl_s16 (int16x4_t a)
8148 int32x4_t result;
8149 __asm__ ("sshll %0.4s,%1.4h,#0"
8150 : "=w"(result)
8151 : "w"(a)
8152 : /* No clobbers */);
8153 return result;
8156 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8157 vmovl_s32 (int32x2_t a)
8159 int64x2_t result;
8160 __asm__ ("sshll %0.2d,%1.2s,#0"
8161 : "=w"(result)
8162 : "w"(a)
8163 : /* No clobbers */);
8164 return result;
8167 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8168 vmovl_u8 (uint8x8_t a)
8170 uint16x8_t result;
8171 __asm__ ("ushll %0.8h,%1.8b,#0"
8172 : "=w"(result)
8173 : "w"(a)
8174 : /* No clobbers */);
8175 return result;
8178 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8179 vmovl_u16 (uint16x4_t a)
8181 uint32x4_t result;
8182 __asm__ ("ushll %0.4s,%1.4h,#0"
8183 : "=w"(result)
8184 : "w"(a)
8185 : /* No clobbers */);
8186 return result;
8189 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8190 vmovl_u32 (uint32x2_t a)
8192 uint64x2_t result;
8193 __asm__ ("ushll %0.2d,%1.2s,#0"
8194 : "=w"(result)
8195 : "w"(a)
8196 : /* No clobbers */);
8197 return result;
8200 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8201 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8203 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8204 __asm__ ("xtn2 %0.16b,%1.8h"
8205 : "+w"(result)
8206 : "w"(b)
8207 : /* No clobbers */);
8208 return result;
8211 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8212 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8214 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8215 __asm__ ("xtn2 %0.8h,%1.4s"
8216 : "+w"(result)
8217 : "w"(b)
8218 : /* No clobbers */);
8219 return result;
8222 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8223 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8225 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8226 __asm__ ("xtn2 %0.4s,%1.2d"
8227 : "+w"(result)
8228 : "w"(b)
8229 : /* No clobbers */);
8230 return result;
8233 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8234 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8236 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8237 __asm__ ("xtn2 %0.16b,%1.8h"
8238 : "+w"(result)
8239 : "w"(b)
8240 : /* No clobbers */);
8241 return result;
8244 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8245 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8247 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8248 __asm__ ("xtn2 %0.8h,%1.4s"
8249 : "+w"(result)
8250 : "w"(b)
8251 : /* No clobbers */);
8252 return result;
8255 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8256 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8258 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8259 __asm__ ("xtn2 %0.4s,%1.2d"
8260 : "+w"(result)
8261 : "w"(b)
8262 : /* No clobbers */);
8263 return result;
8266 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8267 vmovn_s16 (int16x8_t a)
8269 int8x8_t result;
8270 __asm__ ("xtn %0.8b,%1.8h"
8271 : "=w"(result)
8272 : "w"(a)
8273 : /* No clobbers */);
8274 return result;
8277 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8278 vmovn_s32 (int32x4_t a)
8280 int16x4_t result;
8281 __asm__ ("xtn %0.4h,%1.4s"
8282 : "=w"(result)
8283 : "w"(a)
8284 : /* No clobbers */);
8285 return result;
8288 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8289 vmovn_s64 (int64x2_t a)
8291 int32x2_t result;
8292 __asm__ ("xtn %0.2s,%1.2d"
8293 : "=w"(result)
8294 : "w"(a)
8295 : /* No clobbers */);
8296 return result;
8299 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8300 vmovn_u16 (uint16x8_t a)
8302 uint8x8_t result;
8303 __asm__ ("xtn %0.8b,%1.8h"
8304 : "=w"(result)
8305 : "w"(a)
8306 : /* No clobbers */);
8307 return result;
8310 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8311 vmovn_u32 (uint32x4_t a)
8313 uint16x4_t result;
8314 __asm__ ("xtn %0.4h,%1.4s"
8315 : "=w"(result)
8316 : "w"(a)
8317 : /* No clobbers */);
8318 return result;
8321 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8322 vmovn_u64 (uint64x2_t a)
8324 uint32x2_t result;
8325 __asm__ ("xtn %0.2s,%1.2d"
8326 : "=w"(result)
8327 : "w"(a)
8328 : /* No clobbers */);
8329 return result;
8332 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8333 vmul_n_f32 (float32x2_t a, float32_t b)
8335 float32x2_t result;
8336 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8337 : "=w"(result)
8338 : "w"(a), "w"(b)
8339 : /* No clobbers */);
8340 return result;
8343 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8344 vmul_n_s16 (int16x4_t a, int16_t b)
8346 int16x4_t result;
8347 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8348 : "=w"(result)
8349 : "w"(a), "x"(b)
8350 : /* No clobbers */);
8351 return result;
8354 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8355 vmul_n_s32 (int32x2_t a, int32_t b)
8357 int32x2_t result;
8358 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8359 : "=w"(result)
8360 : "w"(a), "w"(b)
8361 : /* No clobbers */);
8362 return result;
8365 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8366 vmul_n_u16 (uint16x4_t a, uint16_t b)
8368 uint16x4_t result;
8369 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8370 : "=w"(result)
8371 : "w"(a), "x"(b)
8372 : /* No clobbers */);
8373 return result;
8376 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8377 vmul_n_u32 (uint32x2_t a, uint32_t b)
8379 uint32x2_t result;
8380 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8381 : "=w"(result)
8382 : "w"(a), "w"(b)
8383 : /* No clobbers */);
8384 return result;
8387 #define vmuld_lane_f64(a, b, c) \
8388 __extension__ \
8389 ({ \
8390 float64x2_t b_ = (b); \
8391 float64_t a_ = (a); \
8392 float64_t result; \
8393 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
8394 : "=w"(result) \
8395 : "w"(a_), "w"(b_), "i"(c) \
8396 : /* No clobbers */); \
8397 result; \
8400 #define vmull_high_lane_s16(a, b, c) \
8401 __extension__ \
8402 ({ \
8403 int16x8_t b_ = (b); \
8404 int16x8_t a_ = (a); \
8405 int32x4_t result; \
8406 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8407 : "=w"(result) \
8408 : "w"(a_), "x"(b_), "i"(c) \
8409 : /* No clobbers */); \
8410 result; \
8413 #define vmull_high_lane_s32(a, b, c) \
8414 __extension__ \
8415 ({ \
8416 int32x4_t b_ = (b); \
8417 int32x4_t a_ = (a); \
8418 int64x2_t result; \
8419 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8420 : "=w"(result) \
8421 : "w"(a_), "w"(b_), "i"(c) \
8422 : /* No clobbers */); \
8423 result; \
8426 #define vmull_high_lane_u16(a, b, c) \
8427 __extension__ \
8428 ({ \
8429 uint16x8_t b_ = (b); \
8430 uint16x8_t a_ = (a); \
8431 uint32x4_t result; \
8432 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8433 : "=w"(result) \
8434 : "w"(a_), "x"(b_), "i"(c) \
8435 : /* No clobbers */); \
8436 result; \
8439 #define vmull_high_lane_u32(a, b, c) \
8440 __extension__ \
8441 ({ \
8442 uint32x4_t b_ = (b); \
8443 uint32x4_t a_ = (a); \
8444 uint64x2_t result; \
8445 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8446 : "=w"(result) \
8447 : "w"(a_), "w"(b_), "i"(c) \
8448 : /* No clobbers */); \
8449 result; \
8452 #define vmull_high_laneq_s16(a, b, c) \
8453 __extension__ \
8454 ({ \
8455 int16x8_t b_ = (b); \
8456 int16x8_t a_ = (a); \
8457 int32x4_t result; \
8458 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8459 : "=w"(result) \
8460 : "w"(a_), "x"(b_), "i"(c) \
8461 : /* No clobbers */); \
8462 result; \
8465 #define vmull_high_laneq_s32(a, b, c) \
8466 __extension__ \
8467 ({ \
8468 int32x4_t b_ = (b); \
8469 int32x4_t a_ = (a); \
8470 int64x2_t result; \
8471 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8472 : "=w"(result) \
8473 : "w"(a_), "w"(b_), "i"(c) \
8474 : /* No clobbers */); \
8475 result; \
8478 #define vmull_high_laneq_u16(a, b, c) \
8479 __extension__ \
8480 ({ \
8481 uint16x8_t b_ = (b); \
8482 uint16x8_t a_ = (a); \
8483 uint32x4_t result; \
8484 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8485 : "=w"(result) \
8486 : "w"(a_), "x"(b_), "i"(c) \
8487 : /* No clobbers */); \
8488 result; \
8491 #define vmull_high_laneq_u32(a, b, c) \
8492 __extension__ \
8493 ({ \
8494 uint32x4_t b_ = (b); \
8495 uint32x4_t a_ = (a); \
8496 uint64x2_t result; \
8497 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8498 : "=w"(result) \
8499 : "w"(a_), "w"(b_), "i"(c) \
8500 : /* No clobbers */); \
8501 result; \
8504 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8505 vmull_high_n_s16 (int16x8_t a, int16_t b)
8507 int32x4_t result;
8508 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8509 : "=w"(result)
8510 : "w"(a), "x"(b)
8511 : /* No clobbers */);
8512 return result;
8515 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8516 vmull_high_n_s32 (int32x4_t a, int32_t b)
8518 int64x2_t result;
8519 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8520 : "=w"(result)
8521 : "w"(a), "w"(b)
8522 : /* No clobbers */);
8523 return result;
8526 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8527 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8529 uint32x4_t result;
8530 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8531 : "=w"(result)
8532 : "w"(a), "x"(b)
8533 : /* No clobbers */);
8534 return result;
8537 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8538 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8540 uint64x2_t result;
8541 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8542 : "=w"(result)
8543 : "w"(a), "w"(b)
8544 : /* No clobbers */);
8545 return result;
8548 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8549 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8551 poly16x8_t result;
8552 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8553 : "=w"(result)
8554 : "w"(a), "w"(b)
8555 : /* No clobbers */);
8556 return result;
8559 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8560 vmull_high_s8 (int8x16_t a, int8x16_t b)
8562 int16x8_t result;
8563 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8564 : "=w"(result)
8565 : "w"(a), "w"(b)
8566 : /* No clobbers */);
8567 return result;
8570 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8571 vmull_high_s16 (int16x8_t a, int16x8_t b)
8573 int32x4_t result;
8574 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8575 : "=w"(result)
8576 : "w"(a), "w"(b)
8577 : /* No clobbers */);
8578 return result;
8581 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8582 vmull_high_s32 (int32x4_t a, int32x4_t b)
8584 int64x2_t result;
8585 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8586 : "=w"(result)
8587 : "w"(a), "w"(b)
8588 : /* No clobbers */);
8589 return result;
8592 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8593 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8595 uint16x8_t result;
8596 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8597 : "=w"(result)
8598 : "w"(a), "w"(b)
8599 : /* No clobbers */);
8600 return result;
8603 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8604 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8606 uint32x4_t result;
8607 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8608 : "=w"(result)
8609 : "w"(a), "w"(b)
8610 : /* No clobbers */);
8611 return result;
8614 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8615 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8617 uint64x2_t result;
8618 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8619 : "=w"(result)
8620 : "w"(a), "w"(b)
8621 : /* No clobbers */);
8622 return result;
8625 #define vmull_lane_s16(a, b, c) \
8626 __extension__ \
8627 ({ \
8628 int16x4_t b_ = (b); \
8629 int16x4_t a_ = (a); \
8630 int32x4_t result; \
8631 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8632 : "=w"(result) \
8633 : "w"(a_), "x"(b_), "i"(c) \
8634 : /* No clobbers */); \
8635 result; \
8638 #define vmull_lane_s32(a, b, c) \
8639 __extension__ \
8640 ({ \
8641 int32x2_t b_ = (b); \
8642 int32x2_t a_ = (a); \
8643 int64x2_t result; \
8644 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8645 : "=w"(result) \
8646 : "w"(a_), "w"(b_), "i"(c) \
8647 : /* No clobbers */); \
8648 result; \
8651 #define vmull_lane_u16(a, b, c) \
8652 __extension__ \
8653 ({ \
8654 uint16x4_t b_ = (b); \
8655 uint16x4_t a_ = (a); \
8656 uint32x4_t result; \
8657 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8658 : "=w"(result) \
8659 : "w"(a_), "x"(b_), "i"(c) \
8660 : /* No clobbers */); \
8661 result; \
8664 #define vmull_lane_u32(a, b, c) \
8665 __extension__ \
8666 ({ \
8667 uint32x2_t b_ = (b); \
8668 uint32x2_t a_ = (a); \
8669 uint64x2_t result; \
8670 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8671 : "=w"(result) \
8672 : "w"(a_), "w"(b_), "i"(c) \
8673 : /* No clobbers */); \
8674 result; \
8677 #define vmull_laneq_s16(a, b, c) \
8678 __extension__ \
8679 ({ \
8680 int16x8_t b_ = (b); \
8681 int16x4_t a_ = (a); \
8682 int32x4_t result; \
8683 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8684 : "=w"(result) \
8685 : "w"(a_), "x"(b_), "i"(c) \
8686 : /* No clobbers */); \
8687 result; \
8690 #define vmull_laneq_s32(a, b, c) \
8691 __extension__ \
8692 ({ \
8693 int32x4_t b_ = (b); \
8694 int32x2_t a_ = (a); \
8695 int64x2_t result; \
8696 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8697 : "=w"(result) \
8698 : "w"(a_), "w"(b_), "i"(c) \
8699 : /* No clobbers */); \
8700 result; \
8703 #define vmull_laneq_u16(a, b, c) \
8704 __extension__ \
8705 ({ \
8706 uint16x8_t b_ = (b); \
8707 uint16x4_t a_ = (a); \
8708 uint32x4_t result; \
8709 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8710 : "=w"(result) \
8711 : "w"(a_), "x"(b_), "i"(c) \
8712 : /* No clobbers */); \
8713 result; \
8716 #define vmull_laneq_u32(a, b, c) \
8717 __extension__ \
8718 ({ \
8719 uint32x4_t b_ = (b); \
8720 uint32x2_t a_ = (a); \
8721 uint64x2_t result; \
8722 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8723 : "=w"(result) \
8724 : "w"(a_), "w"(b_), "i"(c) \
8725 : /* No clobbers */); \
8726 result; \
8729 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8730 vmull_n_s16 (int16x4_t a, int16_t b)
8732 int32x4_t result;
8733 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8734 : "=w"(result)
8735 : "w"(a), "x"(b)
8736 : /* No clobbers */);
8737 return result;
8740 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8741 vmull_n_s32 (int32x2_t a, int32_t b)
8743 int64x2_t result;
8744 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8745 : "=w"(result)
8746 : "w"(a), "w"(b)
8747 : /* No clobbers */);
8748 return result;
8751 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8752 vmull_n_u16 (uint16x4_t a, uint16_t b)
8754 uint32x4_t result;
8755 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8756 : "=w"(result)
8757 : "w"(a), "x"(b)
8758 : /* No clobbers */);
8759 return result;
8762 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8763 vmull_n_u32 (uint32x2_t a, uint32_t b)
8765 uint64x2_t result;
8766 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8767 : "=w"(result)
8768 : "w"(a), "w"(b)
8769 : /* No clobbers */);
8770 return result;
8773 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8774 vmull_p8 (poly8x8_t a, poly8x8_t b)
8776 poly16x8_t result;
8777 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8778 : "=w"(result)
8779 : "w"(a), "w"(b)
8780 : /* No clobbers */);
8781 return result;
8784 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8785 vmull_s8 (int8x8_t a, int8x8_t b)
8787 int16x8_t result;
8788 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8789 : "=w"(result)
8790 : "w"(a), "w"(b)
8791 : /* No clobbers */);
8792 return result;
8795 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8796 vmull_s16 (int16x4_t a, int16x4_t b)
8798 int32x4_t result;
8799 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8800 : "=w"(result)
8801 : "w"(a), "w"(b)
8802 : /* No clobbers */);
8803 return result;
8806 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8807 vmull_s32 (int32x2_t a, int32x2_t b)
8809 int64x2_t result;
8810 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8811 : "=w"(result)
8812 : "w"(a), "w"(b)
8813 : /* No clobbers */);
8814 return result;
8817 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8818 vmull_u8 (uint8x8_t a, uint8x8_t b)
8820 uint16x8_t result;
8821 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8822 : "=w"(result)
8823 : "w"(a), "w"(b)
8824 : /* No clobbers */);
8825 return result;
8828 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8829 vmull_u16 (uint16x4_t a, uint16x4_t b)
8831 uint32x4_t result;
8832 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8833 : "=w"(result)
8834 : "w"(a), "w"(b)
8835 : /* No clobbers */);
8836 return result;
8839 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8840 vmull_u32 (uint32x2_t a, uint32x2_t b)
8842 uint64x2_t result;
8843 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8844 : "=w"(result)
8845 : "w"(a), "w"(b)
8846 : /* No clobbers */);
8847 return result;
8850 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8851 vmulq_n_f32 (float32x4_t a, float32_t b)
8853 float32x4_t result;
8854 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8855 : "=w"(result)
8856 : "w"(a), "w"(b)
8857 : /* No clobbers */);
8858 return result;
8861 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8862 vmulq_n_f64 (float64x2_t a, float64_t b)
8864 float64x2_t result;
8865 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8866 : "=w"(result)
8867 : "w"(a), "w"(b)
8868 : /* No clobbers */);
8869 return result;
8872 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8873 vmulq_n_s16 (int16x8_t a, int16_t b)
8875 int16x8_t result;
8876 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8877 : "=w"(result)
8878 : "w"(a), "x"(b)
8879 : /* No clobbers */);
8880 return result;
8883 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8884 vmulq_n_s32 (int32x4_t a, int32_t b)
8886 int32x4_t result;
8887 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8888 : "=w"(result)
8889 : "w"(a), "w"(b)
8890 : /* No clobbers */);
8891 return result;
8894 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8895 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8897 uint16x8_t result;
8898 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8899 : "=w"(result)
8900 : "w"(a), "x"(b)
8901 : /* No clobbers */);
8902 return result;
8905 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8906 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8908 uint32x4_t result;
8909 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8910 : "=w"(result)
8911 : "w"(a), "w"(b)
8912 : /* No clobbers */);
8913 return result;
8916 #define vmuls_lane_f32(a, b, c) \
8917 __extension__ \
8918 ({ \
8919 float32x4_t b_ = (b); \
8920 float32_t a_ = (a); \
8921 float32_t result; \
8922 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
8923 : "=w"(result) \
8924 : "w"(a_), "w"(b_), "i"(c) \
8925 : /* No clobbers */); \
8926 result; \
8929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8930 vmulx_f32 (float32x2_t a, float32x2_t b)
8932 float32x2_t result;
8933 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8934 : "=w"(result)
8935 : "w"(a), "w"(b)
8936 : /* No clobbers */);
8937 return result;
8940 #define vmulx_lane_f32(a, b, c) \
8941 __extension__ \
8942 ({ \
8943 float32x4_t b_ = (b); \
8944 float32x2_t a_ = (a); \
8945 float32x2_t result; \
8946 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8947 : "=w"(result) \
8948 : "w"(a_), "w"(b_), "i"(c) \
8949 : /* No clobbers */); \
8950 result; \
8953 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8954 vmulxd_f64 (float64_t a, float64_t b)
8956 float64_t result;
8957 __asm__ ("fmulx %d0, %d1, %d2"
8958 : "=w"(result)
8959 : "w"(a), "w"(b)
8960 : /* No clobbers */);
8961 return result;
8964 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8965 vmulxq_f32 (float32x4_t a, float32x4_t b)
8967 float32x4_t result;
8968 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8969 : "=w"(result)
8970 : "w"(a), "w"(b)
8971 : /* No clobbers */);
8972 return result;
8975 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8976 vmulxq_f64 (float64x2_t a, float64x2_t b)
8978 float64x2_t result;
8979 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8980 : "=w"(result)
8981 : "w"(a), "w"(b)
8982 : /* No clobbers */);
8983 return result;
8986 #define vmulxq_lane_f32(a, b, c) \
8987 __extension__ \
8988 ({ \
8989 float32x4_t b_ = (b); \
8990 float32x4_t a_ = (a); \
8991 float32x4_t result; \
8992 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8993 : "=w"(result) \
8994 : "w"(a_), "w"(b_), "i"(c) \
8995 : /* No clobbers */); \
8996 result; \
8999 #define vmulxq_lane_f64(a, b, c) \
9000 __extension__ \
9001 ({ \
9002 float64x2_t b_ = (b); \
9003 float64x2_t a_ = (a); \
9004 float64x2_t result; \
9005 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
9006 : "=w"(result) \
9007 : "w"(a_), "w"(b_), "i"(c) \
9008 : /* No clobbers */); \
9009 result; \
9012 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9013 vmulxs_f32 (float32_t a, float32_t b)
9015 float32_t result;
9016 __asm__ ("fmulx %s0, %s1, %s2"
9017 : "=w"(result)
9018 : "w"(a), "w"(b)
9019 : /* No clobbers */);
9020 return result;
9023 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9024 vmvn_p8 (poly8x8_t a)
9026 poly8x8_t result;
9027 __asm__ ("mvn %0.8b,%1.8b"
9028 : "=w"(result)
9029 : "w"(a)
9030 : /* No clobbers */);
9031 return result;
9034 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9035 vmvn_s8 (int8x8_t a)
9037 int8x8_t result;
9038 __asm__ ("mvn %0.8b,%1.8b"
9039 : "=w"(result)
9040 : "w"(a)
9041 : /* No clobbers */);
9042 return result;
9045 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9046 vmvn_s16 (int16x4_t a)
9048 int16x4_t result;
9049 __asm__ ("mvn %0.8b,%1.8b"
9050 : "=w"(result)
9051 : "w"(a)
9052 : /* No clobbers */);
9053 return result;
9056 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9057 vmvn_s32 (int32x2_t a)
9059 int32x2_t result;
9060 __asm__ ("mvn %0.8b,%1.8b"
9061 : "=w"(result)
9062 : "w"(a)
9063 : /* No clobbers */);
9064 return result;
9067 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9068 vmvn_u8 (uint8x8_t a)
9070 uint8x8_t result;
9071 __asm__ ("mvn %0.8b,%1.8b"
9072 : "=w"(result)
9073 : "w"(a)
9074 : /* No clobbers */);
9075 return result;
9078 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9079 vmvn_u16 (uint16x4_t a)
9081 uint16x4_t result;
9082 __asm__ ("mvn %0.8b,%1.8b"
9083 : "=w"(result)
9084 : "w"(a)
9085 : /* No clobbers */);
9086 return result;
9089 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9090 vmvn_u32 (uint32x2_t a)
9092 uint32x2_t result;
9093 __asm__ ("mvn %0.8b,%1.8b"
9094 : "=w"(result)
9095 : "w"(a)
9096 : /* No clobbers */);
9097 return result;
9100 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9101 vmvnq_p8 (poly8x16_t a)
9103 poly8x16_t result;
9104 __asm__ ("mvn %0.16b,%1.16b"
9105 : "=w"(result)
9106 : "w"(a)
9107 : /* No clobbers */);
9108 return result;
9111 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9112 vmvnq_s8 (int8x16_t a)
9114 int8x16_t result;
9115 __asm__ ("mvn %0.16b,%1.16b"
9116 : "=w"(result)
9117 : "w"(a)
9118 : /* No clobbers */);
9119 return result;
9122 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9123 vmvnq_s16 (int16x8_t a)
9125 int16x8_t result;
9126 __asm__ ("mvn %0.16b,%1.16b"
9127 : "=w"(result)
9128 : "w"(a)
9129 : /* No clobbers */);
9130 return result;
9133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9134 vmvnq_s32 (int32x4_t a)
9136 int32x4_t result;
9137 __asm__ ("mvn %0.16b,%1.16b"
9138 : "=w"(result)
9139 : "w"(a)
9140 : /* No clobbers */);
9141 return result;
9144 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9145 vmvnq_u8 (uint8x16_t a)
9147 uint8x16_t result;
9148 __asm__ ("mvn %0.16b,%1.16b"
9149 : "=w"(result)
9150 : "w"(a)
9151 : /* No clobbers */);
9152 return result;
9155 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9156 vmvnq_u16 (uint16x8_t a)
9158 uint16x8_t result;
9159 __asm__ ("mvn %0.16b,%1.16b"
9160 : "=w"(result)
9161 : "w"(a)
9162 : /* No clobbers */);
9163 return result;
9166 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9167 vmvnq_u32 (uint32x4_t a)
9169 uint32x4_t result;
9170 __asm__ ("mvn %0.16b,%1.16b"
9171 : "=w"(result)
9172 : "w"(a)
9173 : /* No clobbers */);
9174 return result;
9178 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9179 vpadal_s8 (int16x4_t a, int8x8_t b)
9181 int16x4_t result;
9182 __asm__ ("sadalp %0.4h,%2.8b"
9183 : "=w"(result)
9184 : "0"(a), "w"(b)
9185 : /* No clobbers */);
9186 return result;
9189 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9190 vpadal_s16 (int32x2_t a, int16x4_t b)
9192 int32x2_t result;
9193 __asm__ ("sadalp %0.2s,%2.4h"
9194 : "=w"(result)
9195 : "0"(a), "w"(b)
9196 : /* No clobbers */);
9197 return result;
9200 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9201 vpadal_s32 (int64x1_t a, int32x2_t b)
9203 int64x1_t result;
9204 __asm__ ("sadalp %0.1d,%2.2s"
9205 : "=w"(result)
9206 : "0"(a), "w"(b)
9207 : /* No clobbers */);
9208 return result;
9211 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9212 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9214 uint16x4_t result;
9215 __asm__ ("uadalp %0.4h,%2.8b"
9216 : "=w"(result)
9217 : "0"(a), "w"(b)
9218 : /* No clobbers */);
9219 return result;
9222 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9223 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9225 uint32x2_t result;
9226 __asm__ ("uadalp %0.2s,%2.4h"
9227 : "=w"(result)
9228 : "0"(a), "w"(b)
9229 : /* No clobbers */);
9230 return result;
9233 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9234 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9236 uint64x1_t result;
9237 __asm__ ("uadalp %0.1d,%2.2s"
9238 : "=w"(result)
9239 : "0"(a), "w"(b)
9240 : /* No clobbers */);
9241 return result;
9244 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9245 vpadalq_s8 (int16x8_t a, int8x16_t b)
9247 int16x8_t result;
9248 __asm__ ("sadalp %0.8h,%2.16b"
9249 : "=w"(result)
9250 : "0"(a), "w"(b)
9251 : /* No clobbers */);
9252 return result;
9255 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9256 vpadalq_s16 (int32x4_t a, int16x8_t b)
9258 int32x4_t result;
9259 __asm__ ("sadalp %0.4s,%2.8h"
9260 : "=w"(result)
9261 : "0"(a), "w"(b)
9262 : /* No clobbers */);
9263 return result;
9266 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9267 vpadalq_s32 (int64x2_t a, int32x4_t b)
9269 int64x2_t result;
9270 __asm__ ("sadalp %0.2d,%2.4s"
9271 : "=w"(result)
9272 : "0"(a), "w"(b)
9273 : /* No clobbers */);
9274 return result;
9277 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9278 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9280 uint16x8_t result;
9281 __asm__ ("uadalp %0.8h,%2.16b"
9282 : "=w"(result)
9283 : "0"(a), "w"(b)
9284 : /* No clobbers */);
9285 return result;
9288 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9289 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9291 uint32x4_t result;
9292 __asm__ ("uadalp %0.4s,%2.8h"
9293 : "=w"(result)
9294 : "0"(a), "w"(b)
9295 : /* No clobbers */);
9296 return result;
9299 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9300 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9302 uint64x2_t result;
9303 __asm__ ("uadalp %0.2d,%2.4s"
9304 : "=w"(result)
9305 : "0"(a), "w"(b)
9306 : /* No clobbers */);
9307 return result;
9310 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9311 vpadd_f32 (float32x2_t a, float32x2_t b)
9313 float32x2_t result;
9314 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9315 : "=w"(result)
9316 : "w"(a), "w"(b)
9317 : /* No clobbers */);
9318 return result;
9321 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9322 vpadd_s8 (int8x8_t __a, int8x8_t __b)
9324 return __builtin_aarch64_addpv8qi (__a, __b);
9327 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9328 vpadd_s16 (int16x4_t __a, int16x4_t __b)
9330 return __builtin_aarch64_addpv4hi (__a, __b);
9333 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9334 vpadd_s32 (int32x2_t __a, int32x2_t __b)
9336 return __builtin_aarch64_addpv2si (__a, __b);
9339 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9340 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
9342 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
9343 (int8x8_t) __b);
9346 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9347 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
9349 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
9350 (int16x4_t) __b);
9353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9354 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
9356 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
9357 (int32x2_t) __b);
9360 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9361 vpaddd_f64 (float64x2_t a)
9363 float64_t result;
9364 __asm__ ("faddp %d0,%1.2d"
9365 : "=w"(result)
9366 : "w"(a)
9367 : /* No clobbers */);
9368 return result;
9371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9372 vpaddl_s8 (int8x8_t a)
9374 int16x4_t result;
9375 __asm__ ("saddlp %0.4h,%1.8b"
9376 : "=w"(result)
9377 : "w"(a)
9378 : /* No clobbers */);
9379 return result;
9382 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9383 vpaddl_s16 (int16x4_t a)
9385 int32x2_t result;
9386 __asm__ ("saddlp %0.2s,%1.4h"
9387 : "=w"(result)
9388 : "w"(a)
9389 : /* No clobbers */);
9390 return result;
9393 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9394 vpaddl_s32 (int32x2_t a)
9396 int64x1_t result;
9397 __asm__ ("saddlp %0.1d,%1.2s"
9398 : "=w"(result)
9399 : "w"(a)
9400 : /* No clobbers */);
9401 return result;
9404 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9405 vpaddl_u8 (uint8x8_t a)
9407 uint16x4_t result;
9408 __asm__ ("uaddlp %0.4h,%1.8b"
9409 : "=w"(result)
9410 : "w"(a)
9411 : /* No clobbers */);
9412 return result;
9415 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9416 vpaddl_u16 (uint16x4_t a)
9418 uint32x2_t result;
9419 __asm__ ("uaddlp %0.2s,%1.4h"
9420 : "=w"(result)
9421 : "w"(a)
9422 : /* No clobbers */);
9423 return result;
9426 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9427 vpaddl_u32 (uint32x2_t a)
9429 uint64x1_t result;
9430 __asm__ ("uaddlp %0.1d,%1.2s"
9431 : "=w"(result)
9432 : "w"(a)
9433 : /* No clobbers */);
9434 return result;
9437 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9438 vpaddlq_s8 (int8x16_t a)
9440 int16x8_t result;
9441 __asm__ ("saddlp %0.8h,%1.16b"
9442 : "=w"(result)
9443 : "w"(a)
9444 : /* No clobbers */);
9445 return result;
9448 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9449 vpaddlq_s16 (int16x8_t a)
9451 int32x4_t result;
9452 __asm__ ("saddlp %0.4s,%1.8h"
9453 : "=w"(result)
9454 : "w"(a)
9455 : /* No clobbers */);
9456 return result;
9459 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9460 vpaddlq_s32 (int32x4_t a)
9462 int64x2_t result;
9463 __asm__ ("saddlp %0.2d,%1.4s"
9464 : "=w"(result)
9465 : "w"(a)
9466 : /* No clobbers */);
9467 return result;
9470 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9471 vpaddlq_u8 (uint8x16_t a)
9473 uint16x8_t result;
9474 __asm__ ("uaddlp %0.8h,%1.16b"
9475 : "=w"(result)
9476 : "w"(a)
9477 : /* No clobbers */);
9478 return result;
9481 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9482 vpaddlq_u16 (uint16x8_t a)
9484 uint32x4_t result;
9485 __asm__ ("uaddlp %0.4s,%1.8h"
9486 : "=w"(result)
9487 : "w"(a)
9488 : /* No clobbers */);
9489 return result;
9492 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9493 vpaddlq_u32 (uint32x4_t a)
9495 uint64x2_t result;
9496 __asm__ ("uaddlp %0.2d,%1.4s"
9497 : "=w"(result)
9498 : "w"(a)
9499 : /* No clobbers */);
9500 return result;
9503 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9504 vpaddq_f32 (float32x4_t a, float32x4_t b)
9506 float32x4_t result;
9507 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9508 : "=w"(result)
9509 : "w"(a), "w"(b)
9510 : /* No clobbers */);
9511 return result;
9514 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9515 vpaddq_f64 (float64x2_t a, float64x2_t b)
9517 float64x2_t result;
9518 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9519 : "=w"(result)
9520 : "w"(a), "w"(b)
9521 : /* No clobbers */);
9522 return result;
9525 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9526 vpaddq_s8 (int8x16_t a, int8x16_t b)
9528 int8x16_t result;
9529 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9530 : "=w"(result)
9531 : "w"(a), "w"(b)
9532 : /* No clobbers */);
9533 return result;
9536 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9537 vpaddq_s16 (int16x8_t a, int16x8_t b)
9539 int16x8_t result;
9540 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9541 : "=w"(result)
9542 : "w"(a), "w"(b)
9543 : /* No clobbers */);
9544 return result;
9547 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9548 vpaddq_s32 (int32x4_t a, int32x4_t b)
9550 int32x4_t result;
9551 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9552 : "=w"(result)
9553 : "w"(a), "w"(b)
9554 : /* No clobbers */);
9555 return result;
9558 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9559 vpaddq_s64 (int64x2_t a, int64x2_t b)
9561 int64x2_t result;
9562 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9563 : "=w"(result)
9564 : "w"(a), "w"(b)
9565 : /* No clobbers */);
9566 return result;
9569 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9570 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9572 uint8x16_t result;
9573 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9574 : "=w"(result)
9575 : "w"(a), "w"(b)
9576 : /* No clobbers */);
9577 return result;
9580 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9581 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9583 uint16x8_t result;
9584 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9585 : "=w"(result)
9586 : "w"(a), "w"(b)
9587 : /* No clobbers */);
9588 return result;
9591 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9592 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9594 uint32x4_t result;
9595 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9596 : "=w"(result)
9597 : "w"(a), "w"(b)
9598 : /* No clobbers */);
9599 return result;
9602 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9603 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9605 uint64x2_t result;
9606 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9607 : "=w"(result)
9608 : "w"(a), "w"(b)
9609 : /* No clobbers */);
9610 return result;
9613 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9614 vpadds_f32 (float32x2_t a)
9616 float32_t result;
9617 __asm__ ("faddp %s0,%1.2s"
9618 : "=w"(result)
9619 : "w"(a)
9620 : /* No clobbers */);
9621 return result;
9624 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9625 vpmax_f32 (float32x2_t a, float32x2_t b)
9627 float32x2_t result;
9628 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9629 : "=w"(result)
9630 : "w"(a), "w"(b)
9631 : /* No clobbers */);
9632 return result;
9635 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9636 vpmax_s8 (int8x8_t a, int8x8_t b)
9638 int8x8_t result;
9639 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9640 : "=w"(result)
9641 : "w"(a), "w"(b)
9642 : /* No clobbers */);
9643 return result;
9646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9647 vpmax_s16 (int16x4_t a, int16x4_t b)
9649 int16x4_t result;
9650 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9651 : "=w"(result)
9652 : "w"(a), "w"(b)
9653 : /* No clobbers */);
9654 return result;
9657 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9658 vpmax_s32 (int32x2_t a, int32x2_t b)
9660 int32x2_t result;
9661 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9662 : "=w"(result)
9663 : "w"(a), "w"(b)
9664 : /* No clobbers */);
9665 return result;
9668 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9669 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9671 uint8x8_t result;
9672 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9673 : "=w"(result)
9674 : "w"(a), "w"(b)
9675 : /* No clobbers */);
9676 return result;
9679 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9680 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9682 uint16x4_t result;
9683 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9684 : "=w"(result)
9685 : "w"(a), "w"(b)
9686 : /* No clobbers */);
9687 return result;
9690 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9691 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9693 uint32x2_t result;
9694 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9695 : "=w"(result)
9696 : "w"(a), "w"(b)
9697 : /* No clobbers */);
9698 return result;
9701 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9702 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9704 float32x2_t result;
9705 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9706 : "=w"(result)
9707 : "w"(a), "w"(b)
9708 : /* No clobbers */);
9709 return result;
9712 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9713 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9715 float32x4_t result;
9716 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9717 : "=w"(result)
9718 : "w"(a), "w"(b)
9719 : /* No clobbers */);
9720 return result;
9723 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9724 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9726 float64x2_t result;
9727 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9728 : "=w"(result)
9729 : "w"(a), "w"(b)
9730 : /* No clobbers */);
9731 return result;
9734 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9735 vpmaxnmqd_f64 (float64x2_t a)
9737 float64_t result;
9738 __asm__ ("fmaxnmp %d0,%1.2d"
9739 : "=w"(result)
9740 : "w"(a)
9741 : /* No clobbers */);
9742 return result;
9745 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9746 vpmaxnms_f32 (float32x2_t a)
9748 float32_t result;
9749 __asm__ ("fmaxnmp %s0,%1.2s"
9750 : "=w"(result)
9751 : "w"(a)
9752 : /* No clobbers */);
9753 return result;
9756 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9757 vpmaxq_f32 (float32x4_t a, float32x4_t b)
9759 float32x4_t result;
9760 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9761 : "=w"(result)
9762 : "w"(a), "w"(b)
9763 : /* No clobbers */);
9764 return result;
9767 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9768 vpmaxq_f64 (float64x2_t a, float64x2_t b)
9770 float64x2_t result;
9771 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9772 : "=w"(result)
9773 : "w"(a), "w"(b)
9774 : /* No clobbers */);
9775 return result;
9778 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9779 vpmaxq_s8 (int8x16_t a, int8x16_t b)
9781 int8x16_t result;
9782 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9783 : "=w"(result)
9784 : "w"(a), "w"(b)
9785 : /* No clobbers */);
9786 return result;
9789 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9790 vpmaxq_s16 (int16x8_t a, int16x8_t b)
9792 int16x8_t result;
9793 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9794 : "=w"(result)
9795 : "w"(a), "w"(b)
9796 : /* No clobbers */);
9797 return result;
9800 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9801 vpmaxq_s32 (int32x4_t a, int32x4_t b)
9803 int32x4_t result;
9804 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9805 : "=w"(result)
9806 : "w"(a), "w"(b)
9807 : /* No clobbers */);
9808 return result;
9811 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9812 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9814 uint8x16_t result;
9815 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9816 : "=w"(result)
9817 : "w"(a), "w"(b)
9818 : /* No clobbers */);
9819 return result;
9822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9823 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9825 uint16x8_t result;
9826 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9827 : "=w"(result)
9828 : "w"(a), "w"(b)
9829 : /* No clobbers */);
9830 return result;
9833 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9834 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9836 uint32x4_t result;
9837 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9838 : "=w"(result)
9839 : "w"(a), "w"(b)
9840 : /* No clobbers */);
9841 return result;
9844 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9845 vpmaxqd_f64 (float64x2_t a)
9847 float64_t result;
9848 __asm__ ("fmaxp %d0,%1.2d"
9849 : "=w"(result)
9850 : "w"(a)
9851 : /* No clobbers */);
9852 return result;
9855 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9856 vpmaxs_f32 (float32x2_t a)
9858 float32_t result;
9859 __asm__ ("fmaxp %s0,%1.2s"
9860 : "=w"(result)
9861 : "w"(a)
9862 : /* No clobbers */);
9863 return result;
9866 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9867 vpmin_f32 (float32x2_t a, float32x2_t b)
9869 float32x2_t result;
9870 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9871 : "=w"(result)
9872 : "w"(a), "w"(b)
9873 : /* No clobbers */);
9874 return result;
9877 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9878 vpmin_s8 (int8x8_t a, int8x8_t b)
9880 int8x8_t result;
9881 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9882 : "=w"(result)
9883 : "w"(a), "w"(b)
9884 : /* No clobbers */);
9885 return result;
9888 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9889 vpmin_s16 (int16x4_t a, int16x4_t b)
9891 int16x4_t result;
9892 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9893 : "=w"(result)
9894 : "w"(a), "w"(b)
9895 : /* No clobbers */);
9896 return result;
9899 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9900 vpmin_s32 (int32x2_t a, int32x2_t b)
9902 int32x2_t result;
9903 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9904 : "=w"(result)
9905 : "w"(a), "w"(b)
9906 : /* No clobbers */);
9907 return result;
9910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9911 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9913 uint8x8_t result;
9914 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9915 : "=w"(result)
9916 : "w"(a), "w"(b)
9917 : /* No clobbers */);
9918 return result;
9921 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9922 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9924 uint16x4_t result;
9925 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9926 : "=w"(result)
9927 : "w"(a), "w"(b)
9928 : /* No clobbers */);
9929 return result;
9932 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9933 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9935 uint32x2_t result;
9936 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9937 : "=w"(result)
9938 : "w"(a), "w"(b)
9939 : /* No clobbers */);
9940 return result;
9943 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9944 vpminnm_f32 (float32x2_t a, float32x2_t b)
9946 float32x2_t result;
9947 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9948 : "=w"(result)
9949 : "w"(a), "w"(b)
9950 : /* No clobbers */);
9951 return result;
9954 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9955 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9957 float32x4_t result;
9958 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9959 : "=w"(result)
9960 : "w"(a), "w"(b)
9961 : /* No clobbers */);
9962 return result;
9965 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9966 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9968 float64x2_t result;
9969 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9970 : "=w"(result)
9971 : "w"(a), "w"(b)
9972 : /* No clobbers */);
9973 return result;
9976 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9977 vpminnmqd_f64 (float64x2_t a)
9979 float64_t result;
9980 __asm__ ("fminnmp %d0,%1.2d"
9981 : "=w"(result)
9982 : "w"(a)
9983 : /* No clobbers */);
9984 return result;
9987 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9988 vpminnms_f32 (float32x2_t a)
9990 float32_t result;
9991 __asm__ ("fminnmp %s0,%1.2s"
9992 : "=w"(result)
9993 : "w"(a)
9994 : /* No clobbers */);
9995 return result;
9998 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9999 vpminq_f32 (float32x4_t a, float32x4_t b)
10001 float32x4_t result;
10002 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
10003 : "=w"(result)
10004 : "w"(a), "w"(b)
10005 : /* No clobbers */);
10006 return result;
10009 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10010 vpminq_f64 (float64x2_t a, float64x2_t b)
10012 float64x2_t result;
10013 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
10014 : "=w"(result)
10015 : "w"(a), "w"(b)
10016 : /* No clobbers */);
10017 return result;
10020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10021 vpminq_s8 (int8x16_t a, int8x16_t b)
10023 int8x16_t result;
10024 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
10025 : "=w"(result)
10026 : "w"(a), "w"(b)
10027 : /* No clobbers */);
10028 return result;
10031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10032 vpminq_s16 (int16x8_t a, int16x8_t b)
10034 int16x8_t result;
10035 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
10036 : "=w"(result)
10037 : "w"(a), "w"(b)
10038 : /* No clobbers */);
10039 return result;
10042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10043 vpminq_s32 (int32x4_t a, int32x4_t b)
10045 int32x4_t result;
10046 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
10047 : "=w"(result)
10048 : "w"(a), "w"(b)
10049 : /* No clobbers */);
10050 return result;
10053 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10054 vpminq_u8 (uint8x16_t a, uint8x16_t b)
10056 uint8x16_t result;
10057 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
10058 : "=w"(result)
10059 : "w"(a), "w"(b)
10060 : /* No clobbers */);
10061 return result;
10064 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10065 vpminq_u16 (uint16x8_t a, uint16x8_t b)
10067 uint16x8_t result;
10068 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
10069 : "=w"(result)
10070 : "w"(a), "w"(b)
10071 : /* No clobbers */);
10072 return result;
10075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10076 vpminq_u32 (uint32x4_t a, uint32x4_t b)
10078 uint32x4_t result;
10079 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
10080 : "=w"(result)
10081 : "w"(a), "w"(b)
10082 : /* No clobbers */);
10083 return result;
10086 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10087 vpminqd_f64 (float64x2_t a)
10089 float64_t result;
10090 __asm__ ("fminp %d0,%1.2d"
10091 : "=w"(result)
10092 : "w"(a)
10093 : /* No clobbers */);
10094 return result;
10097 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10098 vpmins_f32 (float32x2_t a)
10100 float32_t result;
10101 __asm__ ("fminp %s0,%1.2s"
10102 : "=w"(result)
10103 : "w"(a)
10104 : /* No clobbers */);
10105 return result;
10108 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10109 vqdmulh_n_s16 (int16x4_t a, int16_t b)
10111 int16x4_t result;
10112 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10113 : "=w"(result)
10114 : "w"(a), "w"(b)
10115 : /* No clobbers */);
10116 return result;
10119 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10120 vqdmulh_n_s32 (int32x2_t a, int32_t b)
10122 int32x2_t result;
10123 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10124 : "=w"(result)
10125 : "w"(a), "w"(b)
10126 : /* No clobbers */);
10127 return result;
10130 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10131 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10133 int16x8_t result;
10134 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10135 : "=w"(result)
10136 : "w"(a), "w"(b)
10137 : /* No clobbers */);
10138 return result;
10141 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10142 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10144 int32x4_t result;
10145 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10146 : "=w"(result)
10147 : "w"(a), "w"(b)
10148 : /* No clobbers */);
10149 return result;
10152 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10153 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10155 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10156 __asm__ ("sqxtn2 %0.16b, %1.8h"
10157 : "+w"(result)
10158 : "w"(b)
10159 : /* No clobbers */);
10160 return result;
10163 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10164 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10166 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10167 __asm__ ("sqxtn2 %0.8h, %1.4s"
10168 : "+w"(result)
10169 : "w"(b)
10170 : /* No clobbers */);
10171 return result;
10174 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10175 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10177 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10178 __asm__ ("sqxtn2 %0.4s, %1.2d"
10179 : "+w"(result)
10180 : "w"(b)
10181 : /* No clobbers */);
10182 return result;
10185 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10186 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10188 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10189 __asm__ ("uqxtn2 %0.16b, %1.8h"
10190 : "+w"(result)
10191 : "w"(b)
10192 : /* No clobbers */);
10193 return result;
10196 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10197 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10199 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10200 __asm__ ("uqxtn2 %0.8h, %1.4s"
10201 : "+w"(result)
10202 : "w"(b)
10203 : /* No clobbers */);
10204 return result;
10207 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10208 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10210 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10211 __asm__ ("uqxtn2 %0.4s, %1.2d"
10212 : "+w"(result)
10213 : "w"(b)
10214 : /* No clobbers */);
10215 return result;
10218 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10219 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10221 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10222 __asm__ ("sqxtun2 %0.16b, %1.8h"
10223 : "+w"(result)
10224 : "w"(b)
10225 : /* No clobbers */);
10226 return result;
10229 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10230 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10232 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10233 __asm__ ("sqxtun2 %0.8h, %1.4s"
10234 : "+w"(result)
10235 : "w"(b)
10236 : /* No clobbers */);
10237 return result;
10240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10241 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10243 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10244 __asm__ ("sqxtun2 %0.4s, %1.2d"
10245 : "+w"(result)
10246 : "w"(b)
10247 : /* No clobbers */);
10248 return result;
10251 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10252 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10254 int16x4_t result;
10255 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10256 : "=w"(result)
10257 : "w"(a), "x"(b)
10258 : /* No clobbers */);
10259 return result;
10262 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10263 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10265 int32x2_t result;
10266 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10267 : "=w"(result)
10268 : "w"(a), "w"(b)
10269 : /* No clobbers */);
10270 return result;
10273 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10274 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10276 int16x8_t result;
10277 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10278 : "=w"(result)
10279 : "w"(a), "x"(b)
10280 : /* No clobbers */);
10281 return result;
10284 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10285 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10287 int32x4_t result;
10288 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10289 : "=w"(result)
10290 : "w"(a), "w"(b)
10291 : /* No clobbers */);
10292 return result;
10295 #define vqrshrn_high_n_s16(a, b, c) \
10296 __extension__ \
10297 ({ \
10298 int16x8_t b_ = (b); \
10299 int8x8_t a_ = (a); \
10300 int8x16_t result = vcombine_s8 \
10301 (a_, vcreate_s8 \
10302 (__AARCH64_UINT64_C (0x0))); \
10303 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10304 : "+w"(result) \
10305 : "w"(b_), "i"(c) \
10306 : /* No clobbers */); \
10307 result; \
10310 #define vqrshrn_high_n_s32(a, b, c) \
10311 __extension__ \
10312 ({ \
10313 int32x4_t b_ = (b); \
10314 int16x4_t a_ = (a); \
10315 int16x8_t result = vcombine_s16 \
10316 (a_, vcreate_s16 \
10317 (__AARCH64_UINT64_C (0x0))); \
10318 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10319 : "+w"(result) \
10320 : "w"(b_), "i"(c) \
10321 : /* No clobbers */); \
10322 result; \
10325 #define vqrshrn_high_n_s64(a, b, c) \
10326 __extension__ \
10327 ({ \
10328 int64x2_t b_ = (b); \
10329 int32x2_t a_ = (a); \
10330 int32x4_t result = vcombine_s32 \
10331 (a_, vcreate_s32 \
10332 (__AARCH64_UINT64_C (0x0))); \
10333 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10334 : "+w"(result) \
10335 : "w"(b_), "i"(c) \
10336 : /* No clobbers */); \
10337 result; \
10340 #define vqrshrn_high_n_u16(a, b, c) \
10341 __extension__ \
10342 ({ \
10343 uint16x8_t b_ = (b); \
10344 uint8x8_t a_ = (a); \
10345 uint8x16_t result = vcombine_u8 \
10346 (a_, vcreate_u8 \
10347 (__AARCH64_UINT64_C (0x0))); \
10348 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10349 : "+w"(result) \
10350 : "w"(b_), "i"(c) \
10351 : /* No clobbers */); \
10352 result; \
10355 #define vqrshrn_high_n_u32(a, b, c) \
10356 __extension__ \
10357 ({ \
10358 uint32x4_t b_ = (b); \
10359 uint16x4_t a_ = (a); \
10360 uint16x8_t result = vcombine_u16 \
10361 (a_, vcreate_u16 \
10362 (__AARCH64_UINT64_C (0x0))); \
10363 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10364 : "+w"(result) \
10365 : "w"(b_), "i"(c) \
10366 : /* No clobbers */); \
10367 result; \
10370 #define vqrshrn_high_n_u64(a, b, c) \
10371 __extension__ \
10372 ({ \
10373 uint64x2_t b_ = (b); \
10374 uint32x2_t a_ = (a); \
10375 uint32x4_t result = vcombine_u32 \
10376 (a_, vcreate_u32 \
10377 (__AARCH64_UINT64_C (0x0))); \
10378 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10379 : "+w"(result) \
10380 : "w"(b_), "i"(c) \
10381 : /* No clobbers */); \
10382 result; \
10385 #define vqrshrun_high_n_s16(a, b, c) \
10386 __extension__ \
10387 ({ \
10388 int16x8_t b_ = (b); \
10389 uint8x8_t a_ = (a); \
10390 uint8x16_t result = vcombine_u8 \
10391 (a_, vcreate_u8 \
10392 (__AARCH64_UINT64_C (0x0))); \
10393 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10394 : "+w"(result) \
10395 : "w"(b_), "i"(c) \
10396 : /* No clobbers */); \
10397 result; \
10400 #define vqrshrun_high_n_s32(a, b, c) \
10401 __extension__ \
10402 ({ \
10403 int32x4_t b_ = (b); \
10404 uint16x4_t a_ = (a); \
10405 uint16x8_t result = vcombine_u16 \
10406 (a_, vcreate_u16 \
10407 (__AARCH64_UINT64_C (0x0))); \
10408 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10409 : "+w"(result) \
10410 : "w"(b_), "i"(c) \
10411 : /* No clobbers */); \
10412 result; \
10415 #define vqrshrun_high_n_s64(a, b, c) \
10416 __extension__ \
10417 ({ \
10418 int64x2_t b_ = (b); \
10419 uint32x2_t a_ = (a); \
10420 uint32x4_t result = vcombine_u32 \
10421 (a_, vcreate_u32 \
10422 (__AARCH64_UINT64_C (0x0))); \
10423 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10424 : "+w"(result) \
10425 : "w"(b_), "i"(c) \
10426 : /* No clobbers */); \
10427 result; \
10430 #define vqshrn_high_n_s16(a, b, c) \
10431 __extension__ \
10432 ({ \
10433 int16x8_t b_ = (b); \
10434 int8x8_t a_ = (a); \
10435 int8x16_t result = vcombine_s8 \
10436 (a_, vcreate_s8 \
10437 (__AARCH64_UINT64_C (0x0))); \
10438 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10439 : "+w"(result) \
10440 : "w"(b_), "i"(c) \
10441 : /* No clobbers */); \
10442 result; \
10445 #define vqshrn_high_n_s32(a, b, c) \
10446 __extension__ \
10447 ({ \
10448 int32x4_t b_ = (b); \
10449 int16x4_t a_ = (a); \
10450 int16x8_t result = vcombine_s16 \
10451 (a_, vcreate_s16 \
10452 (__AARCH64_UINT64_C (0x0))); \
10453 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10454 : "+w"(result) \
10455 : "w"(b_), "i"(c) \
10456 : /* No clobbers */); \
10457 result; \
10460 #define vqshrn_high_n_s64(a, b, c) \
10461 __extension__ \
10462 ({ \
10463 int64x2_t b_ = (b); \
10464 int32x2_t a_ = (a); \
10465 int32x4_t result = vcombine_s32 \
10466 (a_, vcreate_s32 \
10467 (__AARCH64_UINT64_C (0x0))); \
10468 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10469 : "+w"(result) \
10470 : "w"(b_), "i"(c) \
10471 : /* No clobbers */); \
10472 result; \
10475 #define vqshrn_high_n_u16(a, b, c) \
10476 __extension__ \
10477 ({ \
10478 uint16x8_t b_ = (b); \
10479 uint8x8_t a_ = (a); \
10480 uint8x16_t result = vcombine_u8 \
10481 (a_, vcreate_u8 \
10482 (__AARCH64_UINT64_C (0x0))); \
10483 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10484 : "+w"(result) \
10485 : "w"(b_), "i"(c) \
10486 : /* No clobbers */); \
10487 result; \
10490 #define vqshrn_high_n_u32(a, b, c) \
10491 __extension__ \
10492 ({ \
10493 uint32x4_t b_ = (b); \
10494 uint16x4_t a_ = (a); \
10495 uint16x8_t result = vcombine_u16 \
10496 (a_, vcreate_u16 \
10497 (__AARCH64_UINT64_C (0x0))); \
10498 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10499 : "+w"(result) \
10500 : "w"(b_), "i"(c) \
10501 : /* No clobbers */); \
10502 result; \
10505 #define vqshrn_high_n_u64(a, b, c) \
10506 __extension__ \
10507 ({ \
10508 uint64x2_t b_ = (b); \
10509 uint32x2_t a_ = (a); \
10510 uint32x4_t result = vcombine_u32 \
10511 (a_, vcreate_u32 \
10512 (__AARCH64_UINT64_C (0x0))); \
10513 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10514 : "+w"(result) \
10515 : "w"(b_), "i"(c) \
10516 : /* No clobbers */); \
10517 result; \
10520 #define vqshrun_high_n_s16(a, b, c) \
10521 __extension__ \
10522 ({ \
10523 int16x8_t b_ = (b); \
10524 uint8x8_t a_ = (a); \
10525 uint8x16_t result = vcombine_u8 \
10526 (a_, vcreate_u8 \
10527 (__AARCH64_UINT64_C (0x0))); \
10528 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10529 : "+w"(result) \
10530 : "w"(b_), "i"(c) \
10531 : /* No clobbers */); \
10532 result; \
10535 #define vqshrun_high_n_s32(a, b, c) \
10536 __extension__ \
10537 ({ \
10538 int32x4_t b_ = (b); \
10539 uint16x4_t a_ = (a); \
10540 uint16x8_t result = vcombine_u16 \
10541 (a_, vcreate_u16 \
10542 (__AARCH64_UINT64_C (0x0))); \
10543 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10544 : "+w"(result) \
10545 : "w"(b_), "i"(c) \
10546 : /* No clobbers */); \
10547 result; \
10550 #define vqshrun_high_n_s64(a, b, c) \
10551 __extension__ \
10552 ({ \
10553 int64x2_t b_ = (b); \
10554 uint32x2_t a_ = (a); \
10555 uint32x4_t result = vcombine_u32 \
10556 (a_, vcreate_u32 \
10557 (__AARCH64_UINT64_C (0x0))); \
10558 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10559 : "+w"(result) \
10560 : "w"(b_), "i"(c) \
10561 : /* No clobbers */); \
10562 result; \
10565 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10566 vrbit_s8 (int8x8_t a)
10568 int8x8_t result;
10569 __asm__ ("rbit %0.8b,%1.8b"
10570 : "=w"(result)
10571 : "w"(a)
10572 : /* No clobbers */);
10573 return result;
10576 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10577 vrbit_u8 (uint8x8_t a)
10579 uint8x8_t result;
10580 __asm__ ("rbit %0.8b,%1.8b"
10581 : "=w"(result)
10582 : "w"(a)
10583 : /* No clobbers */);
10584 return result;
10587 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10588 vrbitq_s8 (int8x16_t a)
10590 int8x16_t result;
10591 __asm__ ("rbit %0.16b,%1.16b"
10592 : "=w"(result)
10593 : "w"(a)
10594 : /* No clobbers */);
10595 return result;
10598 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10599 vrbitq_u8 (uint8x16_t a)
10601 uint8x16_t result;
10602 __asm__ ("rbit %0.16b,%1.16b"
10603 : "=w"(result)
10604 : "w"(a)
10605 : /* No clobbers */);
10606 return result;
10609 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10610 vrecpe_u32 (uint32x2_t a)
10612 uint32x2_t result;
10613 __asm__ ("urecpe %0.2s,%1.2s"
10614 : "=w"(result)
10615 : "w"(a)
10616 : /* No clobbers */);
10617 return result;
10620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10621 vrecpeq_u32 (uint32x4_t a)
10623 uint32x4_t result;
10624 __asm__ ("urecpe %0.4s,%1.4s"
10625 : "=w"(result)
10626 : "w"(a)
10627 : /* No clobbers */);
10628 return result;
10631 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10632 vrev16_p8 (poly8x8_t a)
10634 poly8x8_t result;
10635 __asm__ ("rev16 %0.8b,%1.8b"
10636 : "=w"(result)
10637 : "w"(a)
10638 : /* No clobbers */);
10639 return result;
10642 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10643 vrev16_s8 (int8x8_t a)
10645 int8x8_t result;
10646 __asm__ ("rev16 %0.8b,%1.8b"
10647 : "=w"(result)
10648 : "w"(a)
10649 : /* No clobbers */);
10650 return result;
10653 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10654 vrev16_u8 (uint8x8_t a)
10656 uint8x8_t result;
10657 __asm__ ("rev16 %0.8b,%1.8b"
10658 : "=w"(result)
10659 : "w"(a)
10660 : /* No clobbers */);
10661 return result;
10664 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10665 vrev16q_p8 (poly8x16_t a)
10667 poly8x16_t result;
10668 __asm__ ("rev16 %0.16b,%1.16b"
10669 : "=w"(result)
10670 : "w"(a)
10671 : /* No clobbers */);
10672 return result;
10675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10676 vrev16q_s8 (int8x16_t a)
10678 int8x16_t result;
10679 __asm__ ("rev16 %0.16b,%1.16b"
10680 : "=w"(result)
10681 : "w"(a)
10682 : /* No clobbers */);
10683 return result;
10686 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10687 vrev16q_u8 (uint8x16_t a)
10689 uint8x16_t result;
10690 __asm__ ("rev16 %0.16b,%1.16b"
10691 : "=w"(result)
10692 : "w"(a)
10693 : /* No clobbers */);
10694 return result;
10697 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10698 vrev32_p8 (poly8x8_t a)
10700 poly8x8_t result;
10701 __asm__ ("rev32 %0.8b,%1.8b"
10702 : "=w"(result)
10703 : "w"(a)
10704 : /* No clobbers */);
10705 return result;
10708 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10709 vrev32_p16 (poly16x4_t a)
10711 poly16x4_t result;
10712 __asm__ ("rev32 %0.4h,%1.4h"
10713 : "=w"(result)
10714 : "w"(a)
10715 : /* No clobbers */);
10716 return result;
10719 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10720 vrev32_s8 (int8x8_t a)
10722 int8x8_t result;
10723 __asm__ ("rev32 %0.8b,%1.8b"
10724 : "=w"(result)
10725 : "w"(a)
10726 : /* No clobbers */);
10727 return result;
10730 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10731 vrev32_s16 (int16x4_t a)
10733 int16x4_t result;
10734 __asm__ ("rev32 %0.4h,%1.4h"
10735 : "=w"(result)
10736 : "w"(a)
10737 : /* No clobbers */);
10738 return result;
10741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10742 vrev32_u8 (uint8x8_t a)
10744 uint8x8_t result;
10745 __asm__ ("rev32 %0.8b,%1.8b"
10746 : "=w"(result)
10747 : "w"(a)
10748 : /* No clobbers */);
10749 return result;
10752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10753 vrev32_u16 (uint16x4_t a)
10755 uint16x4_t result;
10756 __asm__ ("rev32 %0.4h,%1.4h"
10757 : "=w"(result)
10758 : "w"(a)
10759 : /* No clobbers */);
10760 return result;
10763 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10764 vrev32q_p8 (poly8x16_t a)
10766 poly8x16_t result;
10767 __asm__ ("rev32 %0.16b,%1.16b"
10768 : "=w"(result)
10769 : "w"(a)
10770 : /* No clobbers */);
10771 return result;
10774 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10775 vrev32q_p16 (poly16x8_t a)
10777 poly16x8_t result;
10778 __asm__ ("rev32 %0.8h,%1.8h"
10779 : "=w"(result)
10780 : "w"(a)
10781 : /* No clobbers */);
10782 return result;
10785 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10786 vrev32q_s8 (int8x16_t a)
10788 int8x16_t result;
10789 __asm__ ("rev32 %0.16b,%1.16b"
10790 : "=w"(result)
10791 : "w"(a)
10792 : /* No clobbers */);
10793 return result;
10796 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10797 vrev32q_s16 (int16x8_t a)
10799 int16x8_t result;
10800 __asm__ ("rev32 %0.8h,%1.8h"
10801 : "=w"(result)
10802 : "w"(a)
10803 : /* No clobbers */);
10804 return result;
10807 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10808 vrev32q_u8 (uint8x16_t a)
10810 uint8x16_t result;
10811 __asm__ ("rev32 %0.16b,%1.16b"
10812 : "=w"(result)
10813 : "w"(a)
10814 : /* No clobbers */);
10815 return result;
10818 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10819 vrev32q_u16 (uint16x8_t a)
10821 uint16x8_t result;
10822 __asm__ ("rev32 %0.8h,%1.8h"
10823 : "=w"(result)
10824 : "w"(a)
10825 : /* No clobbers */);
10826 return result;
10829 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10830 vrev64_f32 (float32x2_t a)
10832 float32x2_t result;
10833 __asm__ ("rev64 %0.2s,%1.2s"
10834 : "=w"(result)
10835 : "w"(a)
10836 : /* No clobbers */);
10837 return result;
10840 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10841 vrev64_p8 (poly8x8_t a)
10843 poly8x8_t result;
10844 __asm__ ("rev64 %0.8b,%1.8b"
10845 : "=w"(result)
10846 : "w"(a)
10847 : /* No clobbers */);
10848 return result;
10851 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10852 vrev64_p16 (poly16x4_t a)
10854 poly16x4_t result;
10855 __asm__ ("rev64 %0.4h,%1.4h"
10856 : "=w"(result)
10857 : "w"(a)
10858 : /* No clobbers */);
10859 return result;
10862 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10863 vrev64_s8 (int8x8_t a)
10865 int8x8_t result;
10866 __asm__ ("rev64 %0.8b,%1.8b"
10867 : "=w"(result)
10868 : "w"(a)
10869 : /* No clobbers */);
10870 return result;
10873 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10874 vrev64_s16 (int16x4_t a)
10876 int16x4_t result;
10877 __asm__ ("rev64 %0.4h,%1.4h"
10878 : "=w"(result)
10879 : "w"(a)
10880 : /* No clobbers */);
10881 return result;
10884 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10885 vrev64_s32 (int32x2_t a)
10887 int32x2_t result;
10888 __asm__ ("rev64 %0.2s,%1.2s"
10889 : "=w"(result)
10890 : "w"(a)
10891 : /* No clobbers */);
10892 return result;
10895 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10896 vrev64_u8 (uint8x8_t a)
10898 uint8x8_t result;
10899 __asm__ ("rev64 %0.8b,%1.8b"
10900 : "=w"(result)
10901 : "w"(a)
10902 : /* No clobbers */);
10903 return result;
10906 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10907 vrev64_u16 (uint16x4_t a)
10909 uint16x4_t result;
10910 __asm__ ("rev64 %0.4h,%1.4h"
10911 : "=w"(result)
10912 : "w"(a)
10913 : /* No clobbers */);
10914 return result;
10917 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10918 vrev64_u32 (uint32x2_t a)
10920 uint32x2_t result;
10921 __asm__ ("rev64 %0.2s,%1.2s"
10922 : "=w"(result)
10923 : "w"(a)
10924 : /* No clobbers */);
10925 return result;
10928 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10929 vrev64q_f32 (float32x4_t a)
10931 float32x4_t result;
10932 __asm__ ("rev64 %0.4s,%1.4s"
10933 : "=w"(result)
10934 : "w"(a)
10935 : /* No clobbers */);
10936 return result;
10939 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10940 vrev64q_p8 (poly8x16_t a)
10942 poly8x16_t result;
10943 __asm__ ("rev64 %0.16b,%1.16b"
10944 : "=w"(result)
10945 : "w"(a)
10946 : /* No clobbers */);
10947 return result;
10950 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10951 vrev64q_p16 (poly16x8_t a)
10953 poly16x8_t result;
10954 __asm__ ("rev64 %0.8h,%1.8h"
10955 : "=w"(result)
10956 : "w"(a)
10957 : /* No clobbers */);
10958 return result;
10961 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10962 vrev64q_s8 (int8x16_t a)
10964 int8x16_t result;
10965 __asm__ ("rev64 %0.16b,%1.16b"
10966 : "=w"(result)
10967 : "w"(a)
10968 : /* No clobbers */);
10969 return result;
10972 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10973 vrev64q_s16 (int16x8_t a)
10975 int16x8_t result;
10976 __asm__ ("rev64 %0.8h,%1.8h"
10977 : "=w"(result)
10978 : "w"(a)
10979 : /* No clobbers */);
10980 return result;
10983 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10984 vrev64q_s32 (int32x4_t a)
10986 int32x4_t result;
10987 __asm__ ("rev64 %0.4s,%1.4s"
10988 : "=w"(result)
10989 : "w"(a)
10990 : /* No clobbers */);
10991 return result;
10994 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10995 vrev64q_u8 (uint8x16_t a)
10997 uint8x16_t result;
10998 __asm__ ("rev64 %0.16b,%1.16b"
10999 : "=w"(result)
11000 : "w"(a)
11001 : /* No clobbers */);
11002 return result;
11005 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11006 vrev64q_u16 (uint16x8_t a)
11008 uint16x8_t result;
11009 __asm__ ("rev64 %0.8h,%1.8h"
11010 : "=w"(result)
11011 : "w"(a)
11012 : /* No clobbers */);
11013 return result;
11016 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11017 vrev64q_u32 (uint32x4_t a)
11019 uint32x4_t result;
11020 __asm__ ("rev64 %0.4s,%1.4s"
11021 : "=w"(result)
11022 : "w"(a)
11023 : /* No clobbers */);
11024 return result;
11027 #define vrshrn_high_n_s16(a, b, c) \
11028 __extension__ \
11029 ({ \
11030 int16x8_t b_ = (b); \
11031 int8x8_t a_ = (a); \
11032 int8x16_t result = vcombine_s8 \
11033 (a_, vcreate_s8 \
11034 (__AARCH64_UINT64_C (0x0))); \
11035 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11036 : "+w"(result) \
11037 : "w"(b_), "i"(c) \
11038 : /* No clobbers */); \
11039 result; \
11042 #define vrshrn_high_n_s32(a, b, c) \
11043 __extension__ \
11044 ({ \
11045 int32x4_t b_ = (b); \
11046 int16x4_t a_ = (a); \
11047 int16x8_t result = vcombine_s16 \
11048 (a_, vcreate_s16 \
11049 (__AARCH64_UINT64_C (0x0))); \
11050 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11051 : "+w"(result) \
11052 : "w"(b_), "i"(c) \
11053 : /* No clobbers */); \
11054 result; \
11057 #define vrshrn_high_n_s64(a, b, c) \
11058 __extension__ \
11059 ({ \
11060 int64x2_t b_ = (b); \
11061 int32x2_t a_ = (a); \
11062 int32x4_t result = vcombine_s32 \
11063 (a_, vcreate_s32 \
11064 (__AARCH64_UINT64_C (0x0))); \
11065 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11066 : "+w"(result) \
11067 : "w"(b_), "i"(c) \
11068 : /* No clobbers */); \
11069 result; \
11072 #define vrshrn_high_n_u16(a, b, c) \
11073 __extension__ \
11074 ({ \
11075 uint16x8_t b_ = (b); \
11076 uint8x8_t a_ = (a); \
11077 uint8x16_t result = vcombine_u8 \
11078 (a_, vcreate_u8 \
11079 (__AARCH64_UINT64_C (0x0))); \
11080 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11081 : "+w"(result) \
11082 : "w"(b_), "i"(c) \
11083 : /* No clobbers */); \
11084 result; \
11087 #define vrshrn_high_n_u32(a, b, c) \
11088 __extension__ \
11089 ({ \
11090 uint32x4_t b_ = (b); \
11091 uint16x4_t a_ = (a); \
11092 uint16x8_t result = vcombine_u16 \
11093 (a_, vcreate_u16 \
11094 (__AARCH64_UINT64_C (0x0))); \
11095 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11096 : "+w"(result) \
11097 : "w"(b_), "i"(c) \
11098 : /* No clobbers */); \
11099 result; \
11102 #define vrshrn_high_n_u64(a, b, c) \
11103 __extension__ \
11104 ({ \
11105 uint64x2_t b_ = (b); \
11106 uint32x2_t a_ = (a); \
11107 uint32x4_t result = vcombine_u32 \
11108 (a_, vcreate_u32 \
11109 (__AARCH64_UINT64_C (0x0))); \
11110 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11111 : "+w"(result) \
11112 : "w"(b_), "i"(c) \
11113 : /* No clobbers */); \
11114 result; \
11117 #define vrshrn_n_s16(a, b) \
11118 __extension__ \
11119 ({ \
11120 int16x8_t a_ = (a); \
11121 int8x8_t result; \
11122 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11123 : "=w"(result) \
11124 : "w"(a_), "i"(b) \
11125 : /* No clobbers */); \
11126 result; \
11129 #define vrshrn_n_s32(a, b) \
11130 __extension__ \
11131 ({ \
11132 int32x4_t a_ = (a); \
11133 int16x4_t result; \
11134 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11135 : "=w"(result) \
11136 : "w"(a_), "i"(b) \
11137 : /* No clobbers */); \
11138 result; \
11141 #define vrshrn_n_s64(a, b) \
11142 __extension__ \
11143 ({ \
11144 int64x2_t a_ = (a); \
11145 int32x2_t result; \
11146 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11147 : "=w"(result) \
11148 : "w"(a_), "i"(b) \
11149 : /* No clobbers */); \
11150 result; \
11153 #define vrshrn_n_u16(a, b) \
11154 __extension__ \
11155 ({ \
11156 uint16x8_t a_ = (a); \
11157 uint8x8_t result; \
11158 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11159 : "=w"(result) \
11160 : "w"(a_), "i"(b) \
11161 : /* No clobbers */); \
11162 result; \
11165 #define vrshrn_n_u32(a, b) \
11166 __extension__ \
11167 ({ \
11168 uint32x4_t a_ = (a); \
11169 uint16x4_t result; \
11170 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11171 : "=w"(result) \
11172 : "w"(a_), "i"(b) \
11173 : /* No clobbers */); \
11174 result; \
11177 #define vrshrn_n_u64(a, b) \
11178 __extension__ \
11179 ({ \
11180 uint64x2_t a_ = (a); \
11181 uint32x2_t result; \
11182 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11183 : "=w"(result) \
11184 : "w"(a_), "i"(b) \
11185 : /* No clobbers */); \
11186 result; \
11189 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11190 vrsqrte_f32 (float32x2_t a)
11192 float32x2_t result;
11193 __asm__ ("frsqrte %0.2s,%1.2s"
11194 : "=w"(result)
11195 : "w"(a)
11196 : /* No clobbers */);
11197 return result;
11200 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11201 vrsqrte_f64 (float64x1_t a)
11203 float64x1_t result;
11204 __asm__ ("frsqrte %d0,%d1"
11205 : "=w"(result)
11206 : "w"(a)
11207 : /* No clobbers */);
11208 return result;
11211 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11212 vrsqrte_u32 (uint32x2_t a)
11214 uint32x2_t result;
11215 __asm__ ("ursqrte %0.2s,%1.2s"
11216 : "=w"(result)
11217 : "w"(a)
11218 : /* No clobbers */);
11219 return result;
11222 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11223 vrsqrted_f64 (float64_t a)
11225 float64_t result;
11226 __asm__ ("frsqrte %d0,%d1"
11227 : "=w"(result)
11228 : "w"(a)
11229 : /* No clobbers */);
11230 return result;
11233 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11234 vrsqrteq_f32 (float32x4_t a)
11236 float32x4_t result;
11237 __asm__ ("frsqrte %0.4s,%1.4s"
11238 : "=w"(result)
11239 : "w"(a)
11240 : /* No clobbers */);
11241 return result;
11244 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11245 vrsqrteq_f64 (float64x2_t a)
11247 float64x2_t result;
11248 __asm__ ("frsqrte %0.2d,%1.2d"
11249 : "=w"(result)
11250 : "w"(a)
11251 : /* No clobbers */);
11252 return result;
11255 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11256 vrsqrteq_u32 (uint32x4_t a)
11258 uint32x4_t result;
11259 __asm__ ("ursqrte %0.4s,%1.4s"
11260 : "=w"(result)
11261 : "w"(a)
11262 : /* No clobbers */);
11263 return result;
11266 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11267 vrsqrtes_f32 (float32_t a)
11269 float32_t result;
11270 __asm__ ("frsqrte %s0,%s1"
11271 : "=w"(result)
11272 : "w"(a)
11273 : /* No clobbers */);
11274 return result;
11277 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11278 vrsqrts_f32 (float32x2_t a, float32x2_t b)
11280 float32x2_t result;
11281 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
11282 : "=w"(result)
11283 : "w"(a), "w"(b)
11284 : /* No clobbers */);
11285 return result;
11288 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11289 vrsqrtsd_f64 (float64_t a, float64_t b)
11291 float64_t result;
11292 __asm__ ("frsqrts %d0,%d1,%d2"
11293 : "=w"(result)
11294 : "w"(a), "w"(b)
11295 : /* No clobbers */);
11296 return result;
11299 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11300 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
11302 float32x4_t result;
11303 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
11304 : "=w"(result)
11305 : "w"(a), "w"(b)
11306 : /* No clobbers */);
11307 return result;
11310 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11311 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
11313 float64x2_t result;
11314 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11315 : "=w"(result)
11316 : "w"(a), "w"(b)
11317 : /* No clobbers */);
11318 return result;
11321 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11322 vrsqrtss_f32 (float32_t a, float32_t b)
11324 float32_t result;
11325 __asm__ ("frsqrts %s0,%s1,%s2"
11326 : "=w"(result)
11327 : "w"(a), "w"(b)
11328 : /* No clobbers */);
11329 return result;
11332 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11333 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
11335 float64x2_t result;
11336 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11337 : "=w"(result)
11338 : "w"(a), "w"(b)
11339 : /* No clobbers */);
11340 return result;
11343 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11344 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11346 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11347 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11348 : "+w"(result)
11349 : "w"(b), "w"(c)
11350 : /* No clobbers */);
11351 return result;
11354 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11355 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11357 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11358 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11359 : "+w"(result)
11360 : "w"(b), "w"(c)
11361 : /* No clobbers */);
11362 return result;
11365 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11366 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11368 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11369 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11370 : "+w"(result)
11371 : "w"(b), "w"(c)
11372 : /* No clobbers */);
11373 return result;
11376 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11377 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11379 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11380 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11381 : "+w"(result)
11382 : "w"(b), "w"(c)
11383 : /* No clobbers */);
11384 return result;
11387 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11388 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11390 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11391 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11392 : "+w"(result)
11393 : "w"(b), "w"(c)
11394 : /* No clobbers */);
11395 return result;
11398 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11399 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11401 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11402 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11403 : "+w"(result)
11404 : "w"(b), "w"(c)
11405 : /* No clobbers */);
11406 return result;
11409 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11410 vrsubhn_s16 (int16x8_t a, int16x8_t b)
11412 int8x8_t result;
11413 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11414 : "=w"(result)
11415 : "w"(a), "w"(b)
11416 : /* No clobbers */);
11417 return result;
11420 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11421 vrsubhn_s32 (int32x4_t a, int32x4_t b)
11423 int16x4_t result;
11424 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11425 : "=w"(result)
11426 : "w"(a), "w"(b)
11427 : /* No clobbers */);
11428 return result;
11431 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11432 vrsubhn_s64 (int64x2_t a, int64x2_t b)
11434 int32x2_t result;
11435 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11436 : "=w"(result)
11437 : "w"(a), "w"(b)
11438 : /* No clobbers */);
11439 return result;
11442 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11443 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
11445 uint8x8_t result;
11446 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11447 : "=w"(result)
11448 : "w"(a), "w"(b)
11449 : /* No clobbers */);
11450 return result;
11453 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11454 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
11456 uint16x4_t result;
11457 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11458 : "=w"(result)
11459 : "w"(a), "w"(b)
11460 : /* No clobbers */);
11461 return result;
11464 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11465 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
11467 uint32x2_t result;
11468 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11469 : "=w"(result)
11470 : "w"(a), "w"(b)
11471 : /* No clobbers */);
11472 return result;
11475 #define vset_lane_f32(a, b, c) \
11476 __extension__ \
11477 ({ \
11478 float32x2_t b_ = (b); \
11479 float32_t a_ = (a); \
11480 float32x2_t result; \
11481 __asm__ ("ins %0.s[%3], %w1" \
11482 : "=w"(result) \
11483 : "r"(a_), "0"(b_), "i"(c) \
11484 : /* No clobbers */); \
11485 result; \
11488 #define vset_lane_f64(a, b, c) \
11489 __extension__ \
11490 ({ \
11491 float64x1_t b_ = (b); \
11492 float64_t a_ = (a); \
11493 float64x1_t result; \
11494 __asm__ ("ins %0.d[%3], %x1" \
11495 : "=w"(result) \
11496 : "r"(a_), "0"(b_), "i"(c) \
11497 : /* No clobbers */); \
11498 result; \
11501 #define vset_lane_p8(a, b, c) \
11502 __extension__ \
11503 ({ \
11504 poly8x8_t b_ = (b); \
11505 poly8_t a_ = (a); \
11506 poly8x8_t result; \
11507 __asm__ ("ins %0.b[%3], %w1" \
11508 : "=w"(result) \
11509 : "r"(a_), "0"(b_), "i"(c) \
11510 : /* No clobbers */); \
11511 result; \
11514 #define vset_lane_p16(a, b, c) \
11515 __extension__ \
11516 ({ \
11517 poly16x4_t b_ = (b); \
11518 poly16_t a_ = (a); \
11519 poly16x4_t result; \
11520 __asm__ ("ins %0.h[%3], %w1" \
11521 : "=w"(result) \
11522 : "r"(a_), "0"(b_), "i"(c) \
11523 : /* No clobbers */); \
11524 result; \
11527 #define vset_lane_s8(a, b, c) \
11528 __extension__ \
11529 ({ \
11530 int8x8_t b_ = (b); \
11531 int8_t a_ = (a); \
11532 int8x8_t result; \
11533 __asm__ ("ins %0.b[%3], %w1" \
11534 : "=w"(result) \
11535 : "r"(a_), "0"(b_), "i"(c) \
11536 : /* No clobbers */); \
11537 result; \
11540 #define vset_lane_s16(a, b, c) \
11541 __extension__ \
11542 ({ \
11543 int16x4_t b_ = (b); \
11544 int16_t a_ = (a); \
11545 int16x4_t result; \
11546 __asm__ ("ins %0.h[%3], %w1" \
11547 : "=w"(result) \
11548 : "r"(a_), "0"(b_), "i"(c) \
11549 : /* No clobbers */); \
11550 result; \
11553 #define vset_lane_s32(a, b, c) \
11554 __extension__ \
11555 ({ \
11556 int32x2_t b_ = (b); \
11557 int32_t a_ = (a); \
11558 int32x2_t result; \
11559 __asm__ ("ins %0.s[%3], %w1" \
11560 : "=w"(result) \
11561 : "r"(a_), "0"(b_), "i"(c) \
11562 : /* No clobbers */); \
11563 result; \
11566 #define vset_lane_s64(a, b, c) \
11567 __extension__ \
11568 ({ \
11569 int64x1_t b_ = (b); \
11570 int64_t a_ = (a); \
11571 int64x1_t result; \
11572 __asm__ ("ins %0.d[%3], %x1" \
11573 : "=w"(result) \
11574 : "r"(a_), "0"(b_), "i"(c) \
11575 : /* No clobbers */); \
11576 result; \
11579 #define vset_lane_u8(a, b, c) \
11580 __extension__ \
11581 ({ \
11582 uint8x8_t b_ = (b); \
11583 uint8_t a_ = (a); \
11584 uint8x8_t result; \
11585 __asm__ ("ins %0.b[%3], %w1" \
11586 : "=w"(result) \
11587 : "r"(a_), "0"(b_), "i"(c) \
11588 : /* No clobbers */); \
11589 result; \
11592 #define vset_lane_u16(a, b, c) \
11593 __extension__ \
11594 ({ \
11595 uint16x4_t b_ = (b); \
11596 uint16_t a_ = (a); \
11597 uint16x4_t result; \
11598 __asm__ ("ins %0.h[%3], %w1" \
11599 : "=w"(result) \
11600 : "r"(a_), "0"(b_), "i"(c) \
11601 : /* No clobbers */); \
11602 result; \
11605 #define vset_lane_u32(a, b, c) \
11606 __extension__ \
11607 ({ \
11608 uint32x2_t b_ = (b); \
11609 uint32_t a_ = (a); \
11610 uint32x2_t result; \
11611 __asm__ ("ins %0.s[%3], %w1" \
11612 : "=w"(result) \
11613 : "r"(a_), "0"(b_), "i"(c) \
11614 : /* No clobbers */); \
11615 result; \
11618 #define vset_lane_u64(a, b, c) \
11619 __extension__ \
11620 ({ \
11621 uint64x1_t b_ = (b); \
11622 uint64_t a_ = (a); \
11623 uint64x1_t result; \
11624 __asm__ ("ins %0.d[%3], %x1" \
11625 : "=w"(result) \
11626 : "r"(a_), "0"(b_), "i"(c) \
11627 : /* No clobbers */); \
11628 result; \
11631 #define vsetq_lane_f32(a, b, c) \
11632 __extension__ \
11633 ({ \
11634 float32x4_t b_ = (b); \
11635 float32_t a_ = (a); \
11636 float32x4_t result; \
11637 __asm__ ("ins %0.s[%3], %w1" \
11638 : "=w"(result) \
11639 : "r"(a_), "0"(b_), "i"(c) \
11640 : /* No clobbers */); \
11641 result; \
11644 #define vsetq_lane_f64(a, b, c) \
11645 __extension__ \
11646 ({ \
11647 float64x2_t b_ = (b); \
11648 float64_t a_ = (a); \
11649 float64x2_t result; \
11650 __asm__ ("ins %0.d[%3], %x1" \
11651 : "=w"(result) \
11652 : "r"(a_), "0"(b_), "i"(c) \
11653 : /* No clobbers */); \
11654 result; \
11657 #define vsetq_lane_p8(a, b, c) \
11658 __extension__ \
11659 ({ \
11660 poly8x16_t b_ = (b); \
11661 poly8_t a_ = (a); \
11662 poly8x16_t result; \
11663 __asm__ ("ins %0.b[%3], %w1" \
11664 : "=w"(result) \
11665 : "r"(a_), "0"(b_), "i"(c) \
11666 : /* No clobbers */); \
11667 result; \
11670 #define vsetq_lane_p16(a, b, c) \
11671 __extension__ \
11672 ({ \
11673 poly16x8_t b_ = (b); \
11674 poly16_t a_ = (a); \
11675 poly16x8_t result; \
11676 __asm__ ("ins %0.h[%3], %w1" \
11677 : "=w"(result) \
11678 : "r"(a_), "0"(b_), "i"(c) \
11679 : /* No clobbers */); \
11680 result; \
11683 #define vsetq_lane_s8(a, b, c) \
11684 __extension__ \
11685 ({ \
11686 int8x16_t b_ = (b); \
11687 int8_t a_ = (a); \
11688 int8x16_t result; \
11689 __asm__ ("ins %0.b[%3], %w1" \
11690 : "=w"(result) \
11691 : "r"(a_), "0"(b_), "i"(c) \
11692 : /* No clobbers */); \
11693 result; \
11696 #define vsetq_lane_s16(a, b, c) \
11697 __extension__ \
11698 ({ \
11699 int16x8_t b_ = (b); \
11700 int16_t a_ = (a); \
11701 int16x8_t result; \
11702 __asm__ ("ins %0.h[%3], %w1" \
11703 : "=w"(result) \
11704 : "r"(a_), "0"(b_), "i"(c) \
11705 : /* No clobbers */); \
11706 result; \
11709 #define vsetq_lane_s32(a, b, c) \
11710 __extension__ \
11711 ({ \
11712 int32x4_t b_ = (b); \
11713 int32_t a_ = (a); \
11714 int32x4_t result; \
11715 __asm__ ("ins %0.s[%3], %w1" \
11716 : "=w"(result) \
11717 : "r"(a_), "0"(b_), "i"(c) \
11718 : /* No clobbers */); \
11719 result; \
11722 #define vsetq_lane_s64(a, b, c) \
11723 __extension__ \
11724 ({ \
11725 int64x2_t b_ = (b); \
11726 int64_t a_ = (a); \
11727 int64x2_t result; \
11728 __asm__ ("ins %0.d[%3], %x1" \
11729 : "=w"(result) \
11730 : "r"(a_), "0"(b_), "i"(c) \
11731 : /* No clobbers */); \
11732 result; \
11735 #define vsetq_lane_u8(a, b, c) \
11736 __extension__ \
11737 ({ \
11738 uint8x16_t b_ = (b); \
11739 uint8_t a_ = (a); \
11740 uint8x16_t result; \
11741 __asm__ ("ins %0.b[%3], %w1" \
11742 : "=w"(result) \
11743 : "r"(a_), "0"(b_), "i"(c) \
11744 : /* No clobbers */); \
11745 result; \
11748 #define vsetq_lane_u16(a, b, c) \
11749 __extension__ \
11750 ({ \
11751 uint16x8_t b_ = (b); \
11752 uint16_t a_ = (a); \
11753 uint16x8_t result; \
11754 __asm__ ("ins %0.h[%3], %w1" \
11755 : "=w"(result) \
11756 : "r"(a_), "0"(b_), "i"(c) \
11757 : /* No clobbers */); \
11758 result; \
11761 #define vsetq_lane_u32(a, b, c) \
11762 __extension__ \
11763 ({ \
11764 uint32x4_t b_ = (b); \
11765 uint32_t a_ = (a); \
11766 uint32x4_t result; \
11767 __asm__ ("ins %0.s[%3], %w1" \
11768 : "=w"(result) \
11769 : "r"(a_), "0"(b_), "i"(c) \
11770 : /* No clobbers */); \
11771 result; \
11774 #define vsetq_lane_u64(a, b, c) \
11775 __extension__ \
11776 ({ \
11777 uint64x2_t b_ = (b); \
11778 uint64_t a_ = (a); \
11779 uint64x2_t result; \
11780 __asm__ ("ins %0.d[%3], %x1" \
11781 : "=w"(result) \
11782 : "r"(a_), "0"(b_), "i"(c) \
11783 : /* No clobbers */); \
11784 result; \
11787 #define vshrn_high_n_s16(a, b, c) \
11788 __extension__ \
11789 ({ \
11790 int16x8_t b_ = (b); \
11791 int8x8_t a_ = (a); \
11792 int8x16_t result = vcombine_s8 \
11793 (a_, vcreate_s8 \
11794 (__AARCH64_UINT64_C (0x0))); \
11795 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11796 : "+w"(result) \
11797 : "w"(b_), "i"(c) \
11798 : /* No clobbers */); \
11799 result; \
11802 #define vshrn_high_n_s32(a, b, c) \
11803 __extension__ \
11804 ({ \
11805 int32x4_t b_ = (b); \
11806 int16x4_t a_ = (a); \
11807 int16x8_t result = vcombine_s16 \
11808 (a_, vcreate_s16 \
11809 (__AARCH64_UINT64_C (0x0))); \
11810 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11811 : "+w"(result) \
11812 : "w"(b_), "i"(c) \
11813 : /* No clobbers */); \
11814 result; \
11817 #define vshrn_high_n_s64(a, b, c) \
11818 __extension__ \
11819 ({ \
11820 int64x2_t b_ = (b); \
11821 int32x2_t a_ = (a); \
11822 int32x4_t result = vcombine_s32 \
11823 (a_, vcreate_s32 \
11824 (__AARCH64_UINT64_C (0x0))); \
11825 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11826 : "+w"(result) \
11827 : "w"(b_), "i"(c) \
11828 : /* No clobbers */); \
11829 result; \
11832 #define vshrn_high_n_u16(a, b, c) \
11833 __extension__ \
11834 ({ \
11835 uint16x8_t b_ = (b); \
11836 uint8x8_t a_ = (a); \
11837 uint8x16_t result = vcombine_u8 \
11838 (a_, vcreate_u8 \
11839 (__AARCH64_UINT64_C (0x0))); \
11840 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11841 : "+w"(result) \
11842 : "w"(b_), "i"(c) \
11843 : /* No clobbers */); \
11844 result; \
11847 #define vshrn_high_n_u32(a, b, c) \
11848 __extension__ \
11849 ({ \
11850 uint32x4_t b_ = (b); \
11851 uint16x4_t a_ = (a); \
11852 uint16x8_t result = vcombine_u16 \
11853 (a_, vcreate_u16 \
11854 (__AARCH64_UINT64_C (0x0))); \
11855 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11856 : "+w"(result) \
11857 : "w"(b_), "i"(c) \
11858 : /* No clobbers */); \
11859 result; \
11862 #define vshrn_high_n_u64(a, b, c) \
11863 __extension__ \
11864 ({ \
11865 uint64x2_t b_ = (b); \
11866 uint32x2_t a_ = (a); \
11867 uint32x4_t result = vcombine_u32 \
11868 (a_, vcreate_u32 \
11869 (__AARCH64_UINT64_C (0x0))); \
11870 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11871 : "+w"(result) \
11872 : "w"(b_), "i"(c) \
11873 : /* No clobbers */); \
11874 result; \
11877 #define vshrn_n_s16(a, b) \
11878 __extension__ \
11879 ({ \
11880 int16x8_t a_ = (a); \
11881 int8x8_t result; \
11882 __asm__ ("shrn %0.8b,%1.8h,%2" \
11883 : "=w"(result) \
11884 : "w"(a_), "i"(b) \
11885 : /* No clobbers */); \
11886 result; \
11889 #define vshrn_n_s32(a, b) \
11890 __extension__ \
11891 ({ \
11892 int32x4_t a_ = (a); \
11893 int16x4_t result; \
11894 __asm__ ("shrn %0.4h,%1.4s,%2" \
11895 : "=w"(result) \
11896 : "w"(a_), "i"(b) \
11897 : /* No clobbers */); \
11898 result; \
11901 #define vshrn_n_s64(a, b) \
11902 __extension__ \
11903 ({ \
11904 int64x2_t a_ = (a); \
11905 int32x2_t result; \
11906 __asm__ ("shrn %0.2s,%1.2d,%2" \
11907 : "=w"(result) \
11908 : "w"(a_), "i"(b) \
11909 : /* No clobbers */); \
11910 result; \
11913 #define vshrn_n_u16(a, b) \
11914 __extension__ \
11915 ({ \
11916 uint16x8_t a_ = (a); \
11917 uint8x8_t result; \
11918 __asm__ ("shrn %0.8b,%1.8h,%2" \
11919 : "=w"(result) \
11920 : "w"(a_), "i"(b) \
11921 : /* No clobbers */); \
11922 result; \
11925 #define vshrn_n_u32(a, b) \
11926 __extension__ \
11927 ({ \
11928 uint32x4_t a_ = (a); \
11929 uint16x4_t result; \
11930 __asm__ ("shrn %0.4h,%1.4s,%2" \
11931 : "=w"(result) \
11932 : "w"(a_), "i"(b) \
11933 : /* No clobbers */); \
11934 result; \
11937 #define vshrn_n_u64(a, b) \
11938 __extension__ \
11939 ({ \
11940 uint64x2_t a_ = (a); \
11941 uint32x2_t result; \
11942 __asm__ ("shrn %0.2s,%1.2d,%2" \
11943 : "=w"(result) \
11944 : "w"(a_), "i"(b) \
11945 : /* No clobbers */); \
11946 result; \
11949 #define vsli_n_p8(a, b, c) \
11950 __extension__ \
11951 ({ \
11952 poly8x8_t b_ = (b); \
11953 poly8x8_t a_ = (a); \
11954 poly8x8_t result; \
11955 __asm__ ("sli %0.8b,%2.8b,%3" \
11956 : "=w"(result) \
11957 : "0"(a_), "w"(b_), "i"(c) \
11958 : /* No clobbers */); \
11959 result; \
11962 #define vsli_n_p16(a, b, c) \
11963 __extension__ \
11964 ({ \
11965 poly16x4_t b_ = (b); \
11966 poly16x4_t a_ = (a); \
11967 poly16x4_t result; \
11968 __asm__ ("sli %0.4h,%2.4h,%3" \
11969 : "=w"(result) \
11970 : "0"(a_), "w"(b_), "i"(c) \
11971 : /* No clobbers */); \
11972 result; \
11975 #define vsliq_n_p8(a, b, c) \
11976 __extension__ \
11977 ({ \
11978 poly8x16_t b_ = (b); \
11979 poly8x16_t a_ = (a); \
11980 poly8x16_t result; \
11981 __asm__ ("sli %0.16b,%2.16b,%3" \
11982 : "=w"(result) \
11983 : "0"(a_), "w"(b_), "i"(c) \
11984 : /* No clobbers */); \
11985 result; \
11988 #define vsliq_n_p16(a, b, c) \
11989 __extension__ \
11990 ({ \
11991 poly16x8_t b_ = (b); \
11992 poly16x8_t a_ = (a); \
11993 poly16x8_t result; \
11994 __asm__ ("sli %0.8h,%2.8h,%3" \
11995 : "=w"(result) \
11996 : "0"(a_), "w"(b_), "i"(c) \
11997 : /* No clobbers */); \
11998 result; \
12001 #define vsri_n_p8(a, b, c) \
12002 __extension__ \
12003 ({ \
12004 poly8x8_t b_ = (b); \
12005 poly8x8_t a_ = (a); \
12006 poly8x8_t result; \
12007 __asm__ ("sri %0.8b,%2.8b,%3" \
12008 : "=w"(result) \
12009 : "0"(a_), "w"(b_), "i"(c) \
12010 : /* No clobbers */); \
12011 result; \
12014 #define vsri_n_p16(a, b, c) \
12015 __extension__ \
12016 ({ \
12017 poly16x4_t b_ = (b); \
12018 poly16x4_t a_ = (a); \
12019 poly16x4_t result; \
12020 __asm__ ("sri %0.4h,%2.4h,%3" \
12021 : "=w"(result) \
12022 : "0"(a_), "w"(b_), "i"(c) \
12023 : /* No clobbers */); \
12024 result; \
12027 #define vsriq_n_p8(a, b, c) \
12028 __extension__ \
12029 ({ \
12030 poly8x16_t b_ = (b); \
12031 poly8x16_t a_ = (a); \
12032 poly8x16_t result; \
12033 __asm__ ("sri %0.16b,%2.16b,%3" \
12034 : "=w"(result) \
12035 : "0"(a_), "w"(b_), "i"(c) \
12036 : /* No clobbers */); \
12037 result; \
12040 #define vsriq_n_p16(a, b, c) \
12041 __extension__ \
12042 ({ \
12043 poly16x8_t b_ = (b); \
12044 poly16x8_t a_ = (a); \
12045 poly16x8_t result; \
12046 __asm__ ("sri %0.8h,%2.8h,%3" \
12047 : "=w"(result) \
12048 : "0"(a_), "w"(b_), "i"(c) \
12049 : /* No clobbers */); \
12050 result; \
12053 #define vst1_lane_f32(a, b, c) \
12054 __extension__ \
12055 ({ \
12056 float32x2_t b_ = (b); \
12057 float32_t * a_ = (a); \
12058 __asm__ ("st1 {%1.s}[%2],[%0]" \
12060 : "r"(a_), "w"(b_), "i"(c) \
12061 : "memory"); \
12064 #define vst1_lane_f64(a, b, c) \
12065 __extension__ \
12066 ({ \
12067 float64x1_t b_ = (b); \
12068 float64_t * a_ = (a); \
12069 __asm__ ("st1 {%1.d}[%2],[%0]" \
12071 : "r"(a_), "w"(b_), "i"(c) \
12072 : "memory"); \
12075 #define vst1_lane_p8(a, b, c) \
12076 __extension__ \
12077 ({ \
12078 poly8x8_t b_ = (b); \
12079 poly8_t * a_ = (a); \
12080 __asm__ ("st1 {%1.b}[%2],[%0]" \
12082 : "r"(a_), "w"(b_), "i"(c) \
12083 : "memory"); \
12086 #define vst1_lane_p16(a, b, c) \
12087 __extension__ \
12088 ({ \
12089 poly16x4_t b_ = (b); \
12090 poly16_t * a_ = (a); \
12091 __asm__ ("st1 {%1.h}[%2],[%0]" \
12093 : "r"(a_), "w"(b_), "i"(c) \
12094 : "memory"); \
12097 #define vst1_lane_s8(a, b, c) \
12098 __extension__ \
12099 ({ \
12100 int8x8_t b_ = (b); \
12101 int8_t * a_ = (a); \
12102 __asm__ ("st1 {%1.b}[%2],[%0]" \
12104 : "r"(a_), "w"(b_), "i"(c) \
12105 : "memory"); \
12108 #define vst1_lane_s16(a, b, c) \
12109 __extension__ \
12110 ({ \
12111 int16x4_t b_ = (b); \
12112 int16_t * a_ = (a); \
12113 __asm__ ("st1 {%1.h}[%2],[%0]" \
12115 : "r"(a_), "w"(b_), "i"(c) \
12116 : "memory"); \
12119 #define vst1_lane_s32(a, b, c) \
12120 __extension__ \
12121 ({ \
12122 int32x2_t b_ = (b); \
12123 int32_t * a_ = (a); \
12124 __asm__ ("st1 {%1.s}[%2],[%0]" \
12126 : "r"(a_), "w"(b_), "i"(c) \
12127 : "memory"); \
12130 #define vst1_lane_s64(a, b, c) \
12131 __extension__ \
12132 ({ \
12133 int64x1_t b_ = (b); \
12134 int64_t * a_ = (a); \
12135 __asm__ ("st1 {%1.d}[%2],[%0]" \
12137 : "r"(a_), "w"(b_), "i"(c) \
12138 : "memory"); \
12141 #define vst1_lane_u8(a, b, c) \
12142 __extension__ \
12143 ({ \
12144 uint8x8_t b_ = (b); \
12145 uint8_t * a_ = (a); \
12146 __asm__ ("st1 {%1.b}[%2],[%0]" \
12148 : "r"(a_), "w"(b_), "i"(c) \
12149 : "memory"); \
12152 #define vst1_lane_u16(a, b, c) \
12153 __extension__ \
12154 ({ \
12155 uint16x4_t b_ = (b); \
12156 uint16_t * a_ = (a); \
12157 __asm__ ("st1 {%1.h}[%2],[%0]" \
12159 : "r"(a_), "w"(b_), "i"(c) \
12160 : "memory"); \
12163 #define vst1_lane_u32(a, b, c) \
12164 __extension__ \
12165 ({ \
12166 uint32x2_t b_ = (b); \
12167 uint32_t * a_ = (a); \
12168 __asm__ ("st1 {%1.s}[%2],[%0]" \
12170 : "r"(a_), "w"(b_), "i"(c) \
12171 : "memory"); \
12174 #define vst1_lane_u64(a, b, c) \
12175 __extension__ \
12176 ({ \
12177 uint64x1_t b_ = (b); \
12178 uint64_t * a_ = (a); \
12179 __asm__ ("st1 {%1.d}[%2],[%0]" \
12181 : "r"(a_), "w"(b_), "i"(c) \
12182 : "memory"); \
12186 #define vst1q_lane_f32(a, b, c) \
12187 __extension__ \
12188 ({ \
12189 float32x4_t b_ = (b); \
12190 float32_t * a_ = (a); \
12191 __asm__ ("st1 {%1.s}[%2],[%0]" \
12193 : "r"(a_), "w"(b_), "i"(c) \
12194 : "memory"); \
12197 #define vst1q_lane_f64(a, b, c) \
12198 __extension__ \
12199 ({ \
12200 float64x2_t b_ = (b); \
12201 float64_t * a_ = (a); \
12202 __asm__ ("st1 {%1.d}[%2],[%0]" \
12204 : "r"(a_), "w"(b_), "i"(c) \
12205 : "memory"); \
12208 #define vst1q_lane_p8(a, b, c) \
12209 __extension__ \
12210 ({ \
12211 poly8x16_t b_ = (b); \
12212 poly8_t * a_ = (a); \
12213 __asm__ ("st1 {%1.b}[%2],[%0]" \
12215 : "r"(a_), "w"(b_), "i"(c) \
12216 : "memory"); \
12219 #define vst1q_lane_p16(a, b, c) \
12220 __extension__ \
12221 ({ \
12222 poly16x8_t b_ = (b); \
12223 poly16_t * a_ = (a); \
12224 __asm__ ("st1 {%1.h}[%2],[%0]" \
12226 : "r"(a_), "w"(b_), "i"(c) \
12227 : "memory"); \
12230 #define vst1q_lane_s8(a, b, c) \
12231 __extension__ \
12232 ({ \
12233 int8x16_t b_ = (b); \
12234 int8_t * a_ = (a); \
12235 __asm__ ("st1 {%1.b}[%2],[%0]" \
12237 : "r"(a_), "w"(b_), "i"(c) \
12238 : "memory"); \
12241 #define vst1q_lane_s16(a, b, c) \
12242 __extension__ \
12243 ({ \
12244 int16x8_t b_ = (b); \
12245 int16_t * a_ = (a); \
12246 __asm__ ("st1 {%1.h}[%2],[%0]" \
12248 : "r"(a_), "w"(b_), "i"(c) \
12249 : "memory"); \
12252 #define vst1q_lane_s32(a, b, c) \
12253 __extension__ \
12254 ({ \
12255 int32x4_t b_ = (b); \
12256 int32_t * a_ = (a); \
12257 __asm__ ("st1 {%1.s}[%2],[%0]" \
12259 : "r"(a_), "w"(b_), "i"(c) \
12260 : "memory"); \
12263 #define vst1q_lane_s64(a, b, c) \
12264 __extension__ \
12265 ({ \
12266 int64x2_t b_ = (b); \
12267 int64_t * a_ = (a); \
12268 __asm__ ("st1 {%1.d}[%2],[%0]" \
12270 : "r"(a_), "w"(b_), "i"(c) \
12271 : "memory"); \
12274 #define vst1q_lane_u8(a, b, c) \
12275 __extension__ \
12276 ({ \
12277 uint8x16_t b_ = (b); \
12278 uint8_t * a_ = (a); \
12279 __asm__ ("st1 {%1.b}[%2],[%0]" \
12281 : "r"(a_), "w"(b_), "i"(c) \
12282 : "memory"); \
12285 #define vst1q_lane_u16(a, b, c) \
12286 __extension__ \
12287 ({ \
12288 uint16x8_t b_ = (b); \
12289 uint16_t * a_ = (a); \
12290 __asm__ ("st1 {%1.h}[%2],[%0]" \
12292 : "r"(a_), "w"(b_), "i"(c) \
12293 : "memory"); \
12296 #define vst1q_lane_u32(a, b, c) \
12297 __extension__ \
12298 ({ \
12299 uint32x4_t b_ = (b); \
12300 uint32_t * a_ = (a); \
12301 __asm__ ("st1 {%1.s}[%2],[%0]" \
12303 : "r"(a_), "w"(b_), "i"(c) \
12304 : "memory"); \
12307 #define vst1q_lane_u64(a, b, c) \
12308 __extension__ \
12309 ({ \
12310 uint64x2_t b_ = (b); \
12311 uint64_t * a_ = (a); \
12312 __asm__ ("st1 {%1.d}[%2],[%0]" \
12314 : "r"(a_), "w"(b_), "i"(c) \
12315 : "memory"); \
12318 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12319 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
12321 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12322 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12323 : "+w"(result)
12324 : "w"(b), "w"(c)
12325 : /* No clobbers */);
12326 return result;
12329 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12330 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
12332 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
12333 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12334 : "+w"(result)
12335 : "w"(b), "w"(c)
12336 : /* No clobbers */);
12337 return result;
12340 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12341 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
12343 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
12344 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12345 : "+w"(result)
12346 : "w"(b), "w"(c)
12347 : /* No clobbers */);
12348 return result;
12351 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12352 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
12354 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12355 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12356 : "+w"(result)
12357 : "w"(b), "w"(c)
12358 : /* No clobbers */);
12359 return result;
12362 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12363 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
12365 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
12366 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12367 : "+w"(result)
12368 : "w"(b), "w"(c)
12369 : /* No clobbers */);
12370 return result;
12373 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12374 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
12376 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
12377 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12378 : "+w"(result)
12379 : "w"(b), "w"(c)
12380 : /* No clobbers */);
12381 return result;
12384 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12385 vsubhn_s16 (int16x8_t a, int16x8_t b)
12387 int8x8_t result;
12388 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12389 : "=w"(result)
12390 : "w"(a), "w"(b)
12391 : /* No clobbers */);
12392 return result;
12395 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12396 vsubhn_s32 (int32x4_t a, int32x4_t b)
12398 int16x4_t result;
12399 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12400 : "=w"(result)
12401 : "w"(a), "w"(b)
12402 : /* No clobbers */);
12403 return result;
12406 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12407 vsubhn_s64 (int64x2_t a, int64x2_t b)
12409 int32x2_t result;
12410 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12411 : "=w"(result)
12412 : "w"(a), "w"(b)
12413 : /* No clobbers */);
12414 return result;
12417 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12418 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
12420 uint8x8_t result;
12421 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12422 : "=w"(result)
12423 : "w"(a), "w"(b)
12424 : /* No clobbers */);
12425 return result;
12428 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12429 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
12431 uint16x4_t result;
12432 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12433 : "=w"(result)
12434 : "w"(a), "w"(b)
12435 : /* No clobbers */);
12436 return result;
12439 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12440 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
12442 uint32x2_t result;
12443 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12444 : "=w"(result)
12445 : "w"(a), "w"(b)
12446 : /* No clobbers */);
12447 return result;
12450 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12451 vtrn1_f32 (float32x2_t a, float32x2_t b)
12453 float32x2_t result;
12454 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12455 : "=w"(result)
12456 : "w"(a), "w"(b)
12457 : /* No clobbers */);
12458 return result;
12461 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12462 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
12464 poly8x8_t result;
12465 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12466 : "=w"(result)
12467 : "w"(a), "w"(b)
12468 : /* No clobbers */);
12469 return result;
12472 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12473 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
12475 poly16x4_t result;
12476 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12477 : "=w"(result)
12478 : "w"(a), "w"(b)
12479 : /* No clobbers */);
12480 return result;
12483 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12484 vtrn1_s8 (int8x8_t a, int8x8_t b)
12486 int8x8_t result;
12487 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12488 : "=w"(result)
12489 : "w"(a), "w"(b)
12490 : /* No clobbers */);
12491 return result;
12494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12495 vtrn1_s16 (int16x4_t a, int16x4_t b)
12497 int16x4_t result;
12498 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12499 : "=w"(result)
12500 : "w"(a), "w"(b)
12501 : /* No clobbers */);
12502 return result;
12505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12506 vtrn1_s32 (int32x2_t a, int32x2_t b)
12508 int32x2_t result;
12509 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12510 : "=w"(result)
12511 : "w"(a), "w"(b)
12512 : /* No clobbers */);
12513 return result;
12516 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12517 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
12519 uint8x8_t result;
12520 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12521 : "=w"(result)
12522 : "w"(a), "w"(b)
12523 : /* No clobbers */);
12524 return result;
12527 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12528 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
12530 uint16x4_t result;
12531 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12532 : "=w"(result)
12533 : "w"(a), "w"(b)
12534 : /* No clobbers */);
12535 return result;
12538 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12539 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
12541 uint32x2_t result;
12542 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12543 : "=w"(result)
12544 : "w"(a), "w"(b)
12545 : /* No clobbers */);
12546 return result;
12549 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12550 vtrn1q_f32 (float32x4_t a, float32x4_t b)
12552 float32x4_t result;
12553 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12554 : "=w"(result)
12555 : "w"(a), "w"(b)
12556 : /* No clobbers */);
12557 return result;
12560 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12561 vtrn1q_f64 (float64x2_t a, float64x2_t b)
12563 float64x2_t result;
12564 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12565 : "=w"(result)
12566 : "w"(a), "w"(b)
12567 : /* No clobbers */);
12568 return result;
12571 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12572 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
12574 poly8x16_t result;
12575 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12576 : "=w"(result)
12577 : "w"(a), "w"(b)
12578 : /* No clobbers */);
12579 return result;
12582 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12583 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
12585 poly16x8_t result;
12586 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12587 : "=w"(result)
12588 : "w"(a), "w"(b)
12589 : /* No clobbers */);
12590 return result;
12593 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12594 vtrn1q_s8 (int8x16_t a, int8x16_t b)
12596 int8x16_t result;
12597 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12598 : "=w"(result)
12599 : "w"(a), "w"(b)
12600 : /* No clobbers */);
12601 return result;
12604 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12605 vtrn1q_s16 (int16x8_t a, int16x8_t b)
12607 int16x8_t result;
12608 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12609 : "=w"(result)
12610 : "w"(a), "w"(b)
12611 : /* No clobbers */);
12612 return result;
12615 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12616 vtrn1q_s32 (int32x4_t a, int32x4_t b)
12618 int32x4_t result;
12619 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12620 : "=w"(result)
12621 : "w"(a), "w"(b)
12622 : /* No clobbers */);
12623 return result;
12626 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12627 vtrn1q_s64 (int64x2_t a, int64x2_t b)
12629 int64x2_t result;
12630 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12631 : "=w"(result)
12632 : "w"(a), "w"(b)
12633 : /* No clobbers */);
12634 return result;
12637 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12638 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
12640 uint8x16_t result;
12641 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12642 : "=w"(result)
12643 : "w"(a), "w"(b)
12644 : /* No clobbers */);
12645 return result;
12648 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12649 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
12651 uint16x8_t result;
12652 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12653 : "=w"(result)
12654 : "w"(a), "w"(b)
12655 : /* No clobbers */);
12656 return result;
12659 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12660 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
12662 uint32x4_t result;
12663 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12664 : "=w"(result)
12665 : "w"(a), "w"(b)
12666 : /* No clobbers */);
12667 return result;
12670 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12671 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
12673 uint64x2_t result;
12674 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12675 : "=w"(result)
12676 : "w"(a), "w"(b)
12677 : /* No clobbers */);
12678 return result;
12681 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12682 vtrn2_f32 (float32x2_t a, float32x2_t b)
12684 float32x2_t result;
12685 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12686 : "=w"(result)
12687 : "w"(a), "w"(b)
12688 : /* No clobbers */);
12689 return result;
12692 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12693 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
12695 poly8x8_t result;
12696 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12697 : "=w"(result)
12698 : "w"(a), "w"(b)
12699 : /* No clobbers */);
12700 return result;
12703 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12704 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
12706 poly16x4_t result;
12707 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12708 : "=w"(result)
12709 : "w"(a), "w"(b)
12710 : /* No clobbers */);
12711 return result;
12714 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12715 vtrn2_s8 (int8x8_t a, int8x8_t b)
12717 int8x8_t result;
12718 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12719 : "=w"(result)
12720 : "w"(a), "w"(b)
12721 : /* No clobbers */);
12722 return result;
12725 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12726 vtrn2_s16 (int16x4_t a, int16x4_t b)
12728 int16x4_t result;
12729 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12730 : "=w"(result)
12731 : "w"(a), "w"(b)
12732 : /* No clobbers */);
12733 return result;
12736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12737 vtrn2_s32 (int32x2_t a, int32x2_t b)
12739 int32x2_t result;
12740 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12741 : "=w"(result)
12742 : "w"(a), "w"(b)
12743 : /* No clobbers */);
12744 return result;
12747 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12748 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
12750 uint8x8_t result;
12751 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12752 : "=w"(result)
12753 : "w"(a), "w"(b)
12754 : /* No clobbers */);
12755 return result;
12758 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12759 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
12761 uint16x4_t result;
12762 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12763 : "=w"(result)
12764 : "w"(a), "w"(b)
12765 : /* No clobbers */);
12766 return result;
12769 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12770 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
12772 uint32x2_t result;
12773 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12774 : "=w"(result)
12775 : "w"(a), "w"(b)
12776 : /* No clobbers */);
12777 return result;
12780 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12781 vtrn2q_f32 (float32x4_t a, float32x4_t b)
12783 float32x4_t result;
12784 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12785 : "=w"(result)
12786 : "w"(a), "w"(b)
12787 : /* No clobbers */);
12788 return result;
12791 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12792 vtrn2q_f64 (float64x2_t a, float64x2_t b)
12794 float64x2_t result;
12795 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12796 : "=w"(result)
12797 : "w"(a), "w"(b)
12798 : /* No clobbers */);
12799 return result;
12802 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12803 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
12805 poly8x16_t result;
12806 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12807 : "=w"(result)
12808 : "w"(a), "w"(b)
12809 : /* No clobbers */);
12810 return result;
12813 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12814 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
12816 poly16x8_t result;
12817 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12818 : "=w"(result)
12819 : "w"(a), "w"(b)
12820 : /* No clobbers */);
12821 return result;
12824 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12825 vtrn2q_s8 (int8x16_t a, int8x16_t b)
12827 int8x16_t result;
12828 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12829 : "=w"(result)
12830 : "w"(a), "w"(b)
12831 : /* No clobbers */);
12832 return result;
12835 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12836 vtrn2q_s16 (int16x8_t a, int16x8_t b)
12838 int16x8_t result;
12839 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12840 : "=w"(result)
12841 : "w"(a), "w"(b)
12842 : /* No clobbers */);
12843 return result;
12846 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12847 vtrn2q_s32 (int32x4_t a, int32x4_t b)
12849 int32x4_t result;
12850 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12851 : "=w"(result)
12852 : "w"(a), "w"(b)
12853 : /* No clobbers */);
12854 return result;
12857 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12858 vtrn2q_s64 (int64x2_t a, int64x2_t b)
12860 int64x2_t result;
12861 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12862 : "=w"(result)
12863 : "w"(a), "w"(b)
12864 : /* No clobbers */);
12865 return result;
12868 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12869 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
12871 uint8x16_t result;
12872 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12873 : "=w"(result)
12874 : "w"(a), "w"(b)
12875 : /* No clobbers */);
12876 return result;
12879 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12880 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
12882 uint16x8_t result;
12883 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12884 : "=w"(result)
12885 : "w"(a), "w"(b)
12886 : /* No clobbers */);
12887 return result;
12890 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12891 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
12893 uint32x4_t result;
12894 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12895 : "=w"(result)
12896 : "w"(a), "w"(b)
12897 : /* No clobbers */);
12898 return result;
12901 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12902 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
12904 uint64x2_t result;
12905 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12906 : "=w"(result)
12907 : "w"(a), "w"(b)
12908 : /* No clobbers */);
12909 return result;
12912 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12913 vtst_p8 (poly8x8_t a, poly8x8_t b)
12915 uint8x8_t result;
12916 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
12917 : "=w"(result)
12918 : "w"(a), "w"(b)
12919 : /* No clobbers */);
12920 return result;
12923 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12924 vtst_p16 (poly16x4_t a, poly16x4_t b)
12926 uint16x4_t result;
12927 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
12928 : "=w"(result)
12929 : "w"(a), "w"(b)
12930 : /* No clobbers */);
12931 return result;
12934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12935 vtstq_p8 (poly8x16_t a, poly8x16_t b)
12937 uint8x16_t result;
12938 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
12939 : "=w"(result)
12940 : "w"(a), "w"(b)
12941 : /* No clobbers */);
12942 return result;
12945 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12946 vtstq_p16 (poly16x8_t a, poly16x8_t b)
12948 uint16x8_t result;
12949 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
12950 : "=w"(result)
12951 : "w"(a), "w"(b)
12952 : /* No clobbers */);
12953 return result;
12955 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12956 vuzp1_f32 (float32x2_t a, float32x2_t b)
12958 float32x2_t result;
12959 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
12960 : "=w"(result)
12961 : "w"(a), "w"(b)
12962 : /* No clobbers */);
12963 return result;
12966 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12967 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
12969 poly8x8_t result;
12970 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
12971 : "=w"(result)
12972 : "w"(a), "w"(b)
12973 : /* No clobbers */);
12974 return result;
12977 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12978 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
12980 poly16x4_t result;
12981 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
12982 : "=w"(result)
12983 : "w"(a), "w"(b)
12984 : /* No clobbers */);
12985 return result;
12988 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12989 vuzp1_s8 (int8x8_t a, int8x8_t b)
12991 int8x8_t result;
12992 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
12993 : "=w"(result)
12994 : "w"(a), "w"(b)
12995 : /* No clobbers */);
12996 return result;
12999 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13000 vuzp1_s16 (int16x4_t a, int16x4_t b)
13002 int16x4_t result;
13003 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13004 : "=w"(result)
13005 : "w"(a), "w"(b)
13006 : /* No clobbers */);
13007 return result;
13010 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13011 vuzp1_s32 (int32x2_t a, int32x2_t b)
13013 int32x2_t result;
13014 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13015 : "=w"(result)
13016 : "w"(a), "w"(b)
13017 : /* No clobbers */);
13018 return result;
13021 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13022 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
13024 uint8x8_t result;
13025 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13026 : "=w"(result)
13027 : "w"(a), "w"(b)
13028 : /* No clobbers */);
13029 return result;
13032 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13033 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
13035 uint16x4_t result;
13036 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13037 : "=w"(result)
13038 : "w"(a), "w"(b)
13039 : /* No clobbers */);
13040 return result;
13043 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13044 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
13046 uint32x2_t result;
13047 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13048 : "=w"(result)
13049 : "w"(a), "w"(b)
13050 : /* No clobbers */);
13051 return result;
13054 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13055 vuzp1q_f32 (float32x4_t a, float32x4_t b)
13057 float32x4_t result;
13058 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13059 : "=w"(result)
13060 : "w"(a), "w"(b)
13061 : /* No clobbers */);
13062 return result;
13065 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13066 vuzp1q_f64 (float64x2_t a, float64x2_t b)
13068 float64x2_t result;
13069 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13070 : "=w"(result)
13071 : "w"(a), "w"(b)
13072 : /* No clobbers */);
13073 return result;
13076 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13077 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
13079 poly8x16_t result;
13080 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13081 : "=w"(result)
13082 : "w"(a), "w"(b)
13083 : /* No clobbers */);
13084 return result;
13087 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13088 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
13090 poly16x8_t result;
13091 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13092 : "=w"(result)
13093 : "w"(a), "w"(b)
13094 : /* No clobbers */);
13095 return result;
13098 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13099 vuzp1q_s8 (int8x16_t a, int8x16_t b)
13101 int8x16_t result;
13102 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13103 : "=w"(result)
13104 : "w"(a), "w"(b)
13105 : /* No clobbers */);
13106 return result;
13109 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13110 vuzp1q_s16 (int16x8_t a, int16x8_t b)
13112 int16x8_t result;
13113 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13114 : "=w"(result)
13115 : "w"(a), "w"(b)
13116 : /* No clobbers */);
13117 return result;
13120 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13121 vuzp1q_s32 (int32x4_t a, int32x4_t b)
13123 int32x4_t result;
13124 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13125 : "=w"(result)
13126 : "w"(a), "w"(b)
13127 : /* No clobbers */);
13128 return result;
13131 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13132 vuzp1q_s64 (int64x2_t a, int64x2_t b)
13134 int64x2_t result;
13135 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13136 : "=w"(result)
13137 : "w"(a), "w"(b)
13138 : /* No clobbers */);
13139 return result;
13142 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13143 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
13145 uint8x16_t result;
13146 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13147 : "=w"(result)
13148 : "w"(a), "w"(b)
13149 : /* No clobbers */);
13150 return result;
13153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13154 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
13156 uint16x8_t result;
13157 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13158 : "=w"(result)
13159 : "w"(a), "w"(b)
13160 : /* No clobbers */);
13161 return result;
13164 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13165 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
13167 uint32x4_t result;
13168 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13169 : "=w"(result)
13170 : "w"(a), "w"(b)
13171 : /* No clobbers */);
13172 return result;
13175 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13176 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
13178 uint64x2_t result;
13179 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13180 : "=w"(result)
13181 : "w"(a), "w"(b)
13182 : /* No clobbers */);
13183 return result;
13186 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13187 vuzp2_f32 (float32x2_t a, float32x2_t b)
13189 float32x2_t result;
13190 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13191 : "=w"(result)
13192 : "w"(a), "w"(b)
13193 : /* No clobbers */);
13194 return result;
13197 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13198 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
13200 poly8x8_t result;
13201 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13202 : "=w"(result)
13203 : "w"(a), "w"(b)
13204 : /* No clobbers */);
13205 return result;
13208 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13209 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
13211 poly16x4_t result;
13212 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13213 : "=w"(result)
13214 : "w"(a), "w"(b)
13215 : /* No clobbers */);
13216 return result;
13219 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13220 vuzp2_s8 (int8x8_t a, int8x8_t b)
13222 int8x8_t result;
13223 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13224 : "=w"(result)
13225 : "w"(a), "w"(b)
13226 : /* No clobbers */);
13227 return result;
13230 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13231 vuzp2_s16 (int16x4_t a, int16x4_t b)
13233 int16x4_t result;
13234 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13235 : "=w"(result)
13236 : "w"(a), "w"(b)
13237 : /* No clobbers */);
13238 return result;
13241 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13242 vuzp2_s32 (int32x2_t a, int32x2_t b)
13244 int32x2_t result;
13245 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13246 : "=w"(result)
13247 : "w"(a), "w"(b)
13248 : /* No clobbers */);
13249 return result;
13252 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13253 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
13255 uint8x8_t result;
13256 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13257 : "=w"(result)
13258 : "w"(a), "w"(b)
13259 : /* No clobbers */);
13260 return result;
13263 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13264 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
13266 uint16x4_t result;
13267 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13268 : "=w"(result)
13269 : "w"(a), "w"(b)
13270 : /* No clobbers */);
13271 return result;
13274 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13275 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
13277 uint32x2_t result;
13278 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13279 : "=w"(result)
13280 : "w"(a), "w"(b)
13281 : /* No clobbers */);
13282 return result;
13285 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13286 vuzp2q_f32 (float32x4_t a, float32x4_t b)
13288 float32x4_t result;
13289 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13290 : "=w"(result)
13291 : "w"(a), "w"(b)
13292 : /* No clobbers */);
13293 return result;
13296 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13297 vuzp2q_f64 (float64x2_t a, float64x2_t b)
13299 float64x2_t result;
13300 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13301 : "=w"(result)
13302 : "w"(a), "w"(b)
13303 : /* No clobbers */);
13304 return result;
13307 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13308 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
13310 poly8x16_t result;
13311 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13312 : "=w"(result)
13313 : "w"(a), "w"(b)
13314 : /* No clobbers */);
13315 return result;
13318 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13319 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
13321 poly16x8_t result;
13322 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13323 : "=w"(result)
13324 : "w"(a), "w"(b)
13325 : /* No clobbers */);
13326 return result;
13329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13330 vuzp2q_s8 (int8x16_t a, int8x16_t b)
13332 int8x16_t result;
13333 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13334 : "=w"(result)
13335 : "w"(a), "w"(b)
13336 : /* No clobbers */);
13337 return result;
13340 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13341 vuzp2q_s16 (int16x8_t a, int16x8_t b)
13343 int16x8_t result;
13344 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13345 : "=w"(result)
13346 : "w"(a), "w"(b)
13347 : /* No clobbers */);
13348 return result;
13351 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13352 vuzp2q_s32 (int32x4_t a, int32x4_t b)
13354 int32x4_t result;
13355 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13356 : "=w"(result)
13357 : "w"(a), "w"(b)
13358 : /* No clobbers */);
13359 return result;
13362 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13363 vuzp2q_s64 (int64x2_t a, int64x2_t b)
13365 int64x2_t result;
13366 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13367 : "=w"(result)
13368 : "w"(a), "w"(b)
13369 : /* No clobbers */);
13370 return result;
13373 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13374 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
13376 uint8x16_t result;
13377 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13378 : "=w"(result)
13379 : "w"(a), "w"(b)
13380 : /* No clobbers */);
13381 return result;
13384 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13385 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
13387 uint16x8_t result;
13388 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13389 : "=w"(result)
13390 : "w"(a), "w"(b)
13391 : /* No clobbers */);
13392 return result;
13395 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13396 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
13398 uint32x4_t result;
13399 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13400 : "=w"(result)
13401 : "w"(a), "w"(b)
13402 : /* No clobbers */);
13403 return result;
13406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13407 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
13409 uint64x2_t result;
13410 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13411 : "=w"(result)
13412 : "w"(a), "w"(b)
13413 : /* No clobbers */);
13414 return result;
13417 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13418 vzip1_f32 (float32x2_t a, float32x2_t b)
13420 float32x2_t result;
13421 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13422 : "=w"(result)
13423 : "w"(a), "w"(b)
13424 : /* No clobbers */);
13425 return result;
13428 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13429 vzip1_p8 (poly8x8_t a, poly8x8_t b)
13431 poly8x8_t result;
13432 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13433 : "=w"(result)
13434 : "w"(a), "w"(b)
13435 : /* No clobbers */);
13436 return result;
13439 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13440 vzip1_p16 (poly16x4_t a, poly16x4_t b)
13442 poly16x4_t result;
13443 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13444 : "=w"(result)
13445 : "w"(a), "w"(b)
13446 : /* No clobbers */);
13447 return result;
13450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13451 vzip1_s8 (int8x8_t a, int8x8_t b)
13453 int8x8_t result;
13454 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13455 : "=w"(result)
13456 : "w"(a), "w"(b)
13457 : /* No clobbers */);
13458 return result;
13461 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13462 vzip1_s16 (int16x4_t a, int16x4_t b)
13464 int16x4_t result;
13465 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13466 : "=w"(result)
13467 : "w"(a), "w"(b)
13468 : /* No clobbers */);
13469 return result;
13472 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13473 vzip1_s32 (int32x2_t a, int32x2_t b)
13475 int32x2_t result;
13476 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13477 : "=w"(result)
13478 : "w"(a), "w"(b)
13479 : /* No clobbers */);
13480 return result;
13483 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13484 vzip1_u8 (uint8x8_t a, uint8x8_t b)
13486 uint8x8_t result;
13487 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13488 : "=w"(result)
13489 : "w"(a), "w"(b)
13490 : /* No clobbers */);
13491 return result;
13494 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13495 vzip1_u16 (uint16x4_t a, uint16x4_t b)
13497 uint16x4_t result;
13498 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13499 : "=w"(result)
13500 : "w"(a), "w"(b)
13501 : /* No clobbers */);
13502 return result;
13505 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13506 vzip1_u32 (uint32x2_t a, uint32x2_t b)
13508 uint32x2_t result;
13509 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13510 : "=w"(result)
13511 : "w"(a), "w"(b)
13512 : /* No clobbers */);
13513 return result;
13516 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13517 vzip1q_f32 (float32x4_t a, float32x4_t b)
13519 float32x4_t result;
13520 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13521 : "=w"(result)
13522 : "w"(a), "w"(b)
13523 : /* No clobbers */);
13524 return result;
13527 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13528 vzip1q_f64 (float64x2_t a, float64x2_t b)
13530 float64x2_t result;
13531 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13532 : "=w"(result)
13533 : "w"(a), "w"(b)
13534 : /* No clobbers */);
13535 return result;
13538 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13539 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
13541 poly8x16_t result;
13542 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13543 : "=w"(result)
13544 : "w"(a), "w"(b)
13545 : /* No clobbers */);
13546 return result;
13549 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13550 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
13552 poly16x8_t result;
13553 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13554 : "=w"(result)
13555 : "w"(a), "w"(b)
13556 : /* No clobbers */);
13557 return result;
13560 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13561 vzip1q_s8 (int8x16_t a, int8x16_t b)
13563 int8x16_t result;
13564 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13565 : "=w"(result)
13566 : "w"(a), "w"(b)
13567 : /* No clobbers */);
13568 return result;
13571 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13572 vzip1q_s16 (int16x8_t a, int16x8_t b)
13574 int16x8_t result;
13575 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13576 : "=w"(result)
13577 : "w"(a), "w"(b)
13578 : /* No clobbers */);
13579 return result;
13582 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13583 vzip1q_s32 (int32x4_t a, int32x4_t b)
13585 int32x4_t result;
13586 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13587 : "=w"(result)
13588 : "w"(a), "w"(b)
13589 : /* No clobbers */);
13590 return result;
13593 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13594 vzip1q_s64 (int64x2_t a, int64x2_t b)
13596 int64x2_t result;
13597 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13598 : "=w"(result)
13599 : "w"(a), "w"(b)
13600 : /* No clobbers */);
13601 return result;
13604 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13605 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
13607 uint8x16_t result;
13608 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13609 : "=w"(result)
13610 : "w"(a), "w"(b)
13611 : /* No clobbers */);
13612 return result;
13615 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13616 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
13618 uint16x8_t result;
13619 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13620 : "=w"(result)
13621 : "w"(a), "w"(b)
13622 : /* No clobbers */);
13623 return result;
13626 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13627 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
13629 uint32x4_t result;
13630 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13631 : "=w"(result)
13632 : "w"(a), "w"(b)
13633 : /* No clobbers */);
13634 return result;
13637 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13638 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
13640 uint64x2_t result;
13641 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13642 : "=w"(result)
13643 : "w"(a), "w"(b)
13644 : /* No clobbers */);
13645 return result;
13648 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13649 vzip2_f32 (float32x2_t a, float32x2_t b)
13651 float32x2_t result;
13652 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13653 : "=w"(result)
13654 : "w"(a), "w"(b)
13655 : /* No clobbers */);
13656 return result;
13659 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13660 vzip2_p8 (poly8x8_t a, poly8x8_t b)
13662 poly8x8_t result;
13663 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13664 : "=w"(result)
13665 : "w"(a), "w"(b)
13666 : /* No clobbers */);
13667 return result;
13670 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13671 vzip2_p16 (poly16x4_t a, poly16x4_t b)
13673 poly16x4_t result;
13674 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13675 : "=w"(result)
13676 : "w"(a), "w"(b)
13677 : /* No clobbers */);
13678 return result;
13681 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13682 vzip2_s8 (int8x8_t a, int8x8_t b)
13684 int8x8_t result;
13685 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13686 : "=w"(result)
13687 : "w"(a), "w"(b)
13688 : /* No clobbers */);
13689 return result;
13692 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13693 vzip2_s16 (int16x4_t a, int16x4_t b)
13695 int16x4_t result;
13696 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13697 : "=w"(result)
13698 : "w"(a), "w"(b)
13699 : /* No clobbers */);
13700 return result;
13703 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13704 vzip2_s32 (int32x2_t a, int32x2_t b)
13706 int32x2_t result;
13707 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13708 : "=w"(result)
13709 : "w"(a), "w"(b)
13710 : /* No clobbers */);
13711 return result;
13714 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13715 vzip2_u8 (uint8x8_t a, uint8x8_t b)
13717 uint8x8_t result;
13718 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13719 : "=w"(result)
13720 : "w"(a), "w"(b)
13721 : /* No clobbers */);
13722 return result;
13725 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13726 vzip2_u16 (uint16x4_t a, uint16x4_t b)
13728 uint16x4_t result;
13729 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13730 : "=w"(result)
13731 : "w"(a), "w"(b)
13732 : /* No clobbers */);
13733 return result;
13736 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13737 vzip2_u32 (uint32x2_t a, uint32x2_t b)
13739 uint32x2_t result;
13740 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13741 : "=w"(result)
13742 : "w"(a), "w"(b)
13743 : /* No clobbers */);
13744 return result;
13747 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13748 vzip2q_f32 (float32x4_t a, float32x4_t b)
13750 float32x4_t result;
13751 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13752 : "=w"(result)
13753 : "w"(a), "w"(b)
13754 : /* No clobbers */);
13755 return result;
13758 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13759 vzip2q_f64 (float64x2_t a, float64x2_t b)
13761 float64x2_t result;
13762 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13763 : "=w"(result)
13764 : "w"(a), "w"(b)
13765 : /* No clobbers */);
13766 return result;
13769 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13770 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
13772 poly8x16_t result;
13773 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13774 : "=w"(result)
13775 : "w"(a), "w"(b)
13776 : /* No clobbers */);
13777 return result;
13780 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13781 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
13783 poly16x8_t result;
13784 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13785 : "=w"(result)
13786 : "w"(a), "w"(b)
13787 : /* No clobbers */);
13788 return result;
13791 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13792 vzip2q_s8 (int8x16_t a, int8x16_t b)
13794 int8x16_t result;
13795 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13796 : "=w"(result)
13797 : "w"(a), "w"(b)
13798 : /* No clobbers */);
13799 return result;
13802 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13803 vzip2q_s16 (int16x8_t a, int16x8_t b)
13805 int16x8_t result;
13806 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13807 : "=w"(result)
13808 : "w"(a), "w"(b)
13809 : /* No clobbers */);
13810 return result;
13813 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13814 vzip2q_s32 (int32x4_t a, int32x4_t b)
13816 int32x4_t result;
13817 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13818 : "=w"(result)
13819 : "w"(a), "w"(b)
13820 : /* No clobbers */);
13821 return result;
13824 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13825 vzip2q_s64 (int64x2_t a, int64x2_t b)
13827 int64x2_t result;
13828 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13829 : "=w"(result)
13830 : "w"(a), "w"(b)
13831 : /* No clobbers */);
13832 return result;
13835 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13836 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
13838 uint8x16_t result;
13839 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13840 : "=w"(result)
13841 : "w"(a), "w"(b)
13842 : /* No clobbers */);
13843 return result;
13846 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13847 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
13849 uint16x8_t result;
13850 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13851 : "=w"(result)
13852 : "w"(a), "w"(b)
13853 : /* No clobbers */);
13854 return result;
13857 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13858 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
13860 uint32x4_t result;
13861 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13862 : "=w"(result)
13863 : "w"(a), "w"(b)
13864 : /* No clobbers */);
13865 return result;
13868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13869 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
13871 uint64x2_t result;
13872 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13873 : "=w"(result)
13874 : "w"(a), "w"(b)
13875 : /* No clobbers */);
13876 return result;
13879 /* End of temporary inline asm implementations. */
13881 /* Start of temporary inline asm for vldn, vstn and friends. */
13883 /* Create struct element types for duplicating loads.
13885 Create 2 element structures of:
13887 +------+----+----+----+----+
13888 | | 8 | 16 | 32 | 64 |
13889 +------+----+----+----+----+
13890 |int | Y | Y | N | N |
13891 +------+----+----+----+----+
13892 |uint | Y | Y | N | N |
13893 +------+----+----+----+----+
13894 |float | - | - | N | N |
13895 +------+----+----+----+----+
13896 |poly | Y | Y | - | - |
13897 +------+----+----+----+----+
13899 Create 3 element structures of:
13901 +------+----+----+----+----+
13902 | | 8 | 16 | 32 | 64 |
13903 +------+----+----+----+----+
13904 |int | Y | Y | Y | Y |
13905 +------+----+----+----+----+
13906 |uint | Y | Y | Y | Y |
13907 +------+----+----+----+----+
13908 |float | - | - | Y | Y |
13909 +------+----+----+----+----+
13910 |poly | Y | Y | - | - |
13911 +------+----+----+----+----+
13913 Create 4 element structures of:
13915 +------+----+----+----+----+
13916 | | 8 | 16 | 32 | 64 |
13917 +------+----+----+----+----+
13918 |int | Y | N | N | Y |
13919 +------+----+----+----+----+
13920 |uint | Y | N | N | Y |
13921 +------+----+----+----+----+
13922 |float | - | - | N | Y |
13923 +------+----+----+----+----+
13924 |poly | Y | N | - | - |
13925 +------+----+----+----+----+
13927 This is required for casting memory reference. */
13928 #define __STRUCTN(t, sz, nelem) \
13929 typedef struct t ## sz ## x ## nelem ## _t { \
13930 t ## sz ## _t val[nelem]; \
13931 } t ## sz ## x ## nelem ## _t;
13933 /* 2-element structs. */
13934 __STRUCTN (int, 8, 2)
13935 __STRUCTN (int, 16, 2)
13936 __STRUCTN (uint, 8, 2)
13937 __STRUCTN (uint, 16, 2)
13938 __STRUCTN (poly, 8, 2)
13939 __STRUCTN (poly, 16, 2)
13940 /* 3-element structs. */
13941 __STRUCTN (int, 8, 3)
13942 __STRUCTN (int, 16, 3)
13943 __STRUCTN (int, 32, 3)
13944 __STRUCTN (int, 64, 3)
13945 __STRUCTN (uint, 8, 3)
13946 __STRUCTN (uint, 16, 3)
13947 __STRUCTN (uint, 32, 3)
13948 __STRUCTN (uint, 64, 3)
13949 __STRUCTN (float, 32, 3)
13950 __STRUCTN (float, 64, 3)
13951 __STRUCTN (poly, 8, 3)
13952 __STRUCTN (poly, 16, 3)
13953 /* 4-element structs. */
13954 __STRUCTN (int, 8, 4)
13955 __STRUCTN (int, 64, 4)
13956 __STRUCTN (uint, 8, 4)
13957 __STRUCTN (uint, 64, 4)
13958 __STRUCTN (poly, 8, 4)
13959 __STRUCTN (float, 64, 4)
13960 #undef __STRUCTN
13962 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
13963 regsuffix, funcsuffix, Q) \
13964 __extension__ static __inline rettype \
13965 __attribute__ ((__always_inline__)) \
13966 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
13968 rettype result; \
13969 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
13970 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
13971 : "=Q"(result) \
13972 : "Q"(*(const structtype *)ptr) \
13973 : "memory", "v16", "v17"); \
13974 return result; \
13977 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
13978 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
13979 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
13980 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
13981 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
13982 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
13983 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
13984 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
13985 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
13986 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
13987 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
13988 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
13989 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
13990 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
13991 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
13992 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
13993 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
13994 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
13995 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
13996 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
13997 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
13998 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
13999 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
14000 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
14002 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
14003 lnsuffix, funcsuffix, Q) \
14004 __extension__ static __inline rettype \
14005 __attribute__ ((__always_inline__)) \
14006 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14007 rettype b, const int c) \
14009 rettype result; \
14010 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14011 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
14012 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
14013 : "=Q"(result) \
14014 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14015 : "memory", "v16", "v17"); \
14016 return result; \
14019 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
14020 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14021 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14022 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14023 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14024 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14025 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14026 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14027 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14028 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14029 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14030 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14031 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14032 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14033 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14034 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14035 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14036 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14037 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14038 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14039 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14040 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14041 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14042 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14044 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
14045 regsuffix, funcsuffix, Q) \
14046 __extension__ static __inline rettype \
14047 __attribute__ ((__always_inline__)) \
14048 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14050 rettype result; \
14051 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14052 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14053 : "=Q"(result) \
14054 : "Q"(*(const structtype *)ptr) \
14055 : "memory", "v16", "v17", "v18"); \
14056 return result; \
14059 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
14060 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
14061 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
14062 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
14063 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
14064 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
14065 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
14066 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
14067 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
14068 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
14069 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
14070 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
14071 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
14072 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
14073 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
14074 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
14075 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
14076 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
14077 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
14078 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
14079 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
14080 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
14081 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
14082 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
14084 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
14085 lnsuffix, funcsuffix, Q) \
14086 __extension__ static __inline rettype \
14087 __attribute__ ((__always_inline__)) \
14088 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14089 rettype b, const int c) \
14091 rettype result; \
14092 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14093 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
14094 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14095 : "=Q"(result) \
14096 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14097 : "memory", "v16", "v17", "v18"); \
14098 return result; \
14101 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
14102 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14103 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14104 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14105 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14106 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14107 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14108 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14109 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14110 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14111 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14112 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14113 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14114 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14115 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14116 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14117 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14118 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14119 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14120 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14121 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14122 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14123 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14124 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14126 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
14127 regsuffix, funcsuffix, Q) \
14128 __extension__ static __inline rettype \
14129 __attribute__ ((__always_inline__)) \
14130 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14132 rettype result; \
14133 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14134 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14135 : "=Q"(result) \
14136 : "Q"(*(const structtype *)ptr) \
14137 : "memory", "v16", "v17", "v18", "v19"); \
14138 return result; \
14141 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
14142 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
14143 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
14144 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
14145 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
14146 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
14147 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
14148 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
14149 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
14150 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
14151 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
14152 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
14153 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
14154 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
14155 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
14156 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
14157 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
14158 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
14159 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
14160 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
14161 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
14162 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
14163 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
14164 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
14166 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
14167 lnsuffix, funcsuffix, Q) \
14168 __extension__ static __inline rettype \
14169 __attribute__ ((__always_inline__)) \
14170 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14171 rettype b, const int c) \
14173 rettype result; \
14174 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14175 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
14176 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14177 : "=Q"(result) \
14178 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14179 : "memory", "v16", "v17", "v18", "v19"); \
14180 return result; \
14183 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
14184 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14185 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14186 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14187 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14188 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14189 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14190 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14191 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14192 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14193 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14194 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14195 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14196 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14197 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14198 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14199 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14200 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14201 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14202 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14203 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14204 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14205 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14206 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14208 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
14209 lnsuffix, funcsuffix, Q) \
14210 typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \
14211 __extension__ static __inline void \
14212 __attribute__ ((__always_inline__)) \
14213 vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14214 intype b, const int c) \
14216 __ST2_LANE_STRUCTURE_##intype *__p = \
14217 (__ST2_LANE_STRUCTURE_##intype *)ptr; \
14218 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14219 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
14220 : "=Q"(*__p) \
14221 : "Q"(b), "i"(c) \
14222 : "v16", "v17"); \
14225 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
14226 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14227 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14228 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14229 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14230 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14231 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14232 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14233 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14234 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14235 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14236 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14237 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14238 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14239 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14240 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14241 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14242 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14243 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14244 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14245 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14246 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14247 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14248 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14250 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
14251 lnsuffix, funcsuffix, Q) \
14252 typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \
14253 __extension__ static __inline void \
14254 __attribute__ ((__always_inline__)) \
14255 vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14256 intype b, const int c) \
14258 __ST3_LANE_STRUCTURE_##intype *__p = \
14259 (__ST3_LANE_STRUCTURE_##intype *)ptr; \
14260 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14261 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
14262 : "=Q"(*__p) \
14263 : "Q"(b), "i"(c) \
14264 : "v16", "v17", "v18"); \
14267 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
14268 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14269 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14270 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14271 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14272 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14273 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14274 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14275 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14276 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14277 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14278 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14279 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14280 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14281 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14282 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14283 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14284 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14285 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14286 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14287 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14288 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14289 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14290 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14292 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
14293 lnsuffix, funcsuffix, Q) \
14294 typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \
14295 __extension__ static __inline void \
14296 __attribute__ ((__always_inline__)) \
14297 vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14298 intype b, const int c) \
14300 __ST4_LANE_STRUCTURE_##intype *__p = \
14301 (__ST4_LANE_STRUCTURE_##intype *)ptr; \
14302 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14303 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
14304 : "=Q"(*__p) \
14305 : "Q"(b), "i"(c) \
14306 : "v16", "v17", "v18", "v19"); \
14309 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
14310 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14311 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14312 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14313 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14314 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14315 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14316 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14317 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14318 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14319 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14320 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14321 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14322 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14323 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14324 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14325 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14326 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14327 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14328 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14329 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14330 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14331 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14332 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14334 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14335 vaddlv_s32 (int32x2_t a)
14337 int64_t result;
14338 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14339 return result;
14342 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14343 vaddlv_u32 (uint32x2_t a)
14345 uint64_t result;
14346 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14347 return result;
14350 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14351 vpaddd_s64 (int64x2_t __a)
14353 return __builtin_aarch64_addpdi (__a);
14356 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14357 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14359 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
14362 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14363 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14365 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
14368 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14369 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14371 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
14374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14375 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14377 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
14380 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14381 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14383 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
14386 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14387 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14389 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
14392 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14393 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14395 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
14398 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14399 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14401 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
14404 /* Table intrinsics. */
14406 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14407 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
14409 poly8x8_t result;
14410 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14411 : "=w"(result)
14412 : "w"(a), "w"(b)
14413 : /* No clobbers */);
14414 return result;
14417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14418 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
14420 int8x8_t result;
14421 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14422 : "=w"(result)
14423 : "w"(a), "w"(b)
14424 : /* No clobbers */);
14425 return result;
14428 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14429 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
14431 uint8x8_t result;
14432 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14433 : "=w"(result)
14434 : "w"(a), "w"(b)
14435 : /* No clobbers */);
14436 return result;
14439 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14440 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
14442 poly8x16_t result;
14443 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14444 : "=w"(result)
14445 : "w"(a), "w"(b)
14446 : /* No clobbers */);
14447 return result;
14450 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14451 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
14453 int8x16_t result;
14454 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14455 : "=w"(result)
14456 : "w"(a), "w"(b)
14457 : /* No clobbers */);
14458 return result;
14461 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14462 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
14464 uint8x16_t result;
14465 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14466 : "=w"(result)
14467 : "w"(a), "w"(b)
14468 : /* No clobbers */);
14469 return result;
14472 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14473 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
14475 int8x8_t result;
14476 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14477 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14478 :"=w"(result)
14479 :"Q"(tab),"w"(idx)
14480 :"memory", "v16", "v17");
14481 return result;
14484 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14485 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
14487 uint8x8_t result;
14488 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14489 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14490 :"=w"(result)
14491 :"Q"(tab),"w"(idx)
14492 :"memory", "v16", "v17");
14493 return result;
14496 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14497 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
14499 poly8x8_t result;
14500 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14501 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14502 :"=w"(result)
14503 :"Q"(tab),"w"(idx)
14504 :"memory", "v16", "v17");
14505 return result;
14508 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14509 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
14511 int8x16_t result;
14512 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14513 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14514 :"=w"(result)
14515 :"Q"(tab),"w"(idx)
14516 :"memory", "v16", "v17");
14517 return result;
14520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14521 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
14523 uint8x16_t result;
14524 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14525 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14526 :"=w"(result)
14527 :"Q"(tab),"w"(idx)
14528 :"memory", "v16", "v17");
14529 return result;
14532 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14533 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
14535 poly8x16_t result;
14536 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14537 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14538 :"=w"(result)
14539 :"Q"(tab),"w"(idx)
14540 :"memory", "v16", "v17");
14541 return result;
14544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14545 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
14547 int8x8_t result;
14548 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14549 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14550 :"=w"(result)
14551 :"Q"(tab),"w"(idx)
14552 :"memory", "v16", "v17", "v18");
14553 return result;
14556 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14557 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
14559 uint8x8_t result;
14560 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14561 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14562 :"=w"(result)
14563 :"Q"(tab),"w"(idx)
14564 :"memory", "v16", "v17", "v18");
14565 return result;
14568 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14569 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
14571 poly8x8_t result;
14572 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14573 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14574 :"=w"(result)
14575 :"Q"(tab),"w"(idx)
14576 :"memory", "v16", "v17", "v18");
14577 return result;
14580 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14581 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
14583 int8x16_t result;
14584 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14585 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14586 :"=w"(result)
14587 :"Q"(tab),"w"(idx)
14588 :"memory", "v16", "v17", "v18");
14589 return result;
14592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14593 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
14595 uint8x16_t result;
14596 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14597 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14598 :"=w"(result)
14599 :"Q"(tab),"w"(idx)
14600 :"memory", "v16", "v17", "v18");
14601 return result;
14604 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14605 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
14607 poly8x16_t result;
14608 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14609 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14610 :"=w"(result)
14611 :"Q"(tab),"w"(idx)
14612 :"memory", "v16", "v17", "v18");
14613 return result;
14616 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14617 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
14619 int8x8_t result;
14620 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14621 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14622 :"=w"(result)
14623 :"Q"(tab),"w"(idx)
14624 :"memory", "v16", "v17", "v18", "v19");
14625 return result;
14628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14629 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
14631 uint8x8_t result;
14632 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14633 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14634 :"=w"(result)
14635 :"Q"(tab),"w"(idx)
14636 :"memory", "v16", "v17", "v18", "v19");
14637 return result;
14640 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14641 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
14643 poly8x8_t result;
14644 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14645 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14646 :"=w"(result)
14647 :"Q"(tab),"w"(idx)
14648 :"memory", "v16", "v17", "v18", "v19");
14649 return result;
14653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14654 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
14656 int8x16_t result;
14657 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14658 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14659 :"=w"(result)
14660 :"Q"(tab),"w"(idx)
14661 :"memory", "v16", "v17", "v18", "v19");
14662 return result;
14665 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14666 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
14668 uint8x16_t result;
14669 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14670 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14671 :"=w"(result)
14672 :"Q"(tab),"w"(idx)
14673 :"memory", "v16", "v17", "v18", "v19");
14674 return result;
14677 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14678 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
14680 poly8x16_t result;
14681 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14682 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14683 :"=w"(result)
14684 :"Q"(tab),"w"(idx)
14685 :"memory", "v16", "v17", "v18", "v19");
14686 return result;
14690 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14691 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
14693 int8x8_t result = r;
14694 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14695 : "+w"(result)
14696 : "w"(tab), "w"(idx)
14697 : /* No clobbers */);
14698 return result;
14701 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14702 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
14704 uint8x8_t result = r;
14705 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14706 : "+w"(result)
14707 : "w"(tab), "w"(idx)
14708 : /* No clobbers */);
14709 return result;
14712 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14713 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
14715 poly8x8_t result = r;
14716 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14717 : "+w"(result)
14718 : "w"(tab), "w"(idx)
14719 : /* No clobbers */);
14720 return result;
14723 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14724 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
14726 int8x16_t result = r;
14727 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14728 : "+w"(result)
14729 : "w"(tab), "w"(idx)
14730 : /* No clobbers */);
14731 return result;
14734 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14735 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
14737 uint8x16_t result = r;
14738 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14739 : "+w"(result)
14740 : "w"(tab), "w"(idx)
14741 : /* No clobbers */);
14742 return result;
14745 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14746 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
14748 poly8x16_t result = r;
14749 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14750 : "+w"(result)
14751 : "w"(tab), "w"(idx)
14752 : /* No clobbers */);
14753 return result;
14756 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14757 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
14759 int8x8_t result = r;
14760 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14761 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14762 :"+w"(result)
14763 :"Q"(tab),"w"(idx)
14764 :"memory", "v16", "v17");
14765 return result;
14768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14769 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
14771 uint8x8_t result = r;
14772 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14773 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14774 :"+w"(result)
14775 :"Q"(tab),"w"(idx)
14776 :"memory", "v16", "v17");
14777 return result;
14780 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14781 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
14783 poly8x8_t result = r;
14784 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14785 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14786 :"+w"(result)
14787 :"Q"(tab),"w"(idx)
14788 :"memory", "v16", "v17");
14789 return result;
14793 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14794 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
14796 int8x16_t result = r;
14797 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14798 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14799 :"+w"(result)
14800 :"Q"(tab),"w"(idx)
14801 :"memory", "v16", "v17");
14802 return result;
14805 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14806 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
14808 uint8x16_t result = r;
14809 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14810 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14811 :"+w"(result)
14812 :"Q"(tab),"w"(idx)
14813 :"memory", "v16", "v17");
14814 return result;
14817 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14818 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
14820 poly8x16_t result = r;
14821 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14822 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14823 :"+w"(result)
14824 :"Q"(tab),"w"(idx)
14825 :"memory", "v16", "v17");
14826 return result;
14830 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14831 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
14833 int8x8_t result = r;
14834 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14835 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14836 :"+w"(result)
14837 :"Q"(tab),"w"(idx)
14838 :"memory", "v16", "v17", "v18");
14839 return result;
14842 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14843 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
14845 uint8x8_t result = r;
14846 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14847 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14848 :"+w"(result)
14849 :"Q"(tab),"w"(idx)
14850 :"memory", "v16", "v17", "v18");
14851 return result;
14854 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14855 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
14857 poly8x8_t result = r;
14858 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14859 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14860 :"+w"(result)
14861 :"Q"(tab),"w"(idx)
14862 :"memory", "v16", "v17", "v18");
14863 return result;
14867 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14868 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
14870 int8x16_t result = r;
14871 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14872 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14873 :"+w"(result)
14874 :"Q"(tab),"w"(idx)
14875 :"memory", "v16", "v17", "v18");
14876 return result;
14879 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14880 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
14882 uint8x16_t result = r;
14883 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14884 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14885 :"+w"(result)
14886 :"Q"(tab),"w"(idx)
14887 :"memory", "v16", "v17", "v18");
14888 return result;
14891 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14892 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
14894 poly8x16_t result = r;
14895 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14896 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14897 :"+w"(result)
14898 :"Q"(tab),"w"(idx)
14899 :"memory", "v16", "v17", "v18");
14900 return result;
14904 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14905 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
14907 int8x8_t result = r;
14908 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14909 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14910 :"+w"(result)
14911 :"Q"(tab),"w"(idx)
14912 :"memory", "v16", "v17", "v18", "v19");
14913 return result;
14916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14917 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
14919 uint8x8_t result = r;
14920 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14921 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14922 :"+w"(result)
14923 :"Q"(tab),"w"(idx)
14924 :"memory", "v16", "v17", "v18", "v19");
14925 return result;
14928 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14929 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
14931 poly8x8_t result = r;
14932 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14933 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14934 :"+w"(result)
14935 :"Q"(tab),"w"(idx)
14936 :"memory", "v16", "v17", "v18", "v19");
14937 return result;
14941 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14942 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
14944 int8x16_t result = r;
14945 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14946 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14947 :"+w"(result)
14948 :"Q"(tab),"w"(idx)
14949 :"memory", "v16", "v17", "v18", "v19");
14950 return result;
14953 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14954 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
14956 uint8x16_t result = r;
14957 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14958 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14959 :"+w"(result)
14960 :"Q"(tab),"w"(idx)
14961 :"memory", "v16", "v17", "v18", "v19");
14962 return result;
14965 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14966 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
14968 poly8x16_t result = r;
14969 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14970 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14971 :"+w"(result)
14972 :"Q"(tab),"w"(idx)
14973 :"memory", "v16", "v17", "v18", "v19");
14974 return result;
14977 /* V7 legacy table intrinsics. */
14979 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14980 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
14982 int8x8_t result;
14983 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
14984 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14985 : "=w"(result)
14986 : "w"(temp), "w"(idx)
14987 : /* No clobbers */);
14988 return result;
14991 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14992 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
14994 uint8x8_t result;
14995 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
14996 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14997 : "=w"(result)
14998 : "w"(temp), "w"(idx)
14999 : /* No clobbers */);
15000 return result;
15003 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15004 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
15006 poly8x8_t result;
15007 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15008 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15009 : "=w"(result)
15010 : "w"(temp), "w"(idx)
15011 : /* No clobbers */);
15012 return result;
15015 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15016 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
15018 int8x8_t result;
15019 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15020 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15021 : "=w"(result)
15022 : "w"(temp), "w"(idx)
15023 : /* No clobbers */);
15024 return result;
15027 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15028 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
15030 uint8x8_t result;
15031 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15032 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15033 : "=w"(result)
15034 : "w"(temp), "w"(idx)
15035 : /* No clobbers */);
15036 return result;
15039 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15040 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
15042 poly8x8_t result;
15043 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15044 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15045 : "=w"(result)
15046 : "w"(temp), "w"(idx)
15047 : /* No clobbers */);
15048 return result;
15051 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15052 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
15054 int8x8_t result;
15055 int8x16x2_t temp;
15056 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15057 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15058 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15059 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15060 : "=w"(result)
15061 : "Q"(temp), "w"(idx)
15062 : "v16", "v17", "memory");
15063 return result;
15066 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15067 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
15069 uint8x8_t result;
15070 uint8x16x2_t temp;
15071 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15072 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15073 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15074 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15075 : "=w"(result)
15076 : "Q"(temp), "w"(idx)
15077 : "v16", "v17", "memory");
15078 return result;
15081 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15082 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
15084 poly8x8_t result;
15085 poly8x16x2_t temp;
15086 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15087 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15088 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15089 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15090 : "=w"(result)
15091 : "Q"(temp), "w"(idx)
15092 : "v16", "v17", "memory");
15093 return result;
15096 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15097 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
15099 int8x8_t result;
15100 int8x16x2_t temp;
15101 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15102 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15103 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15104 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15105 : "=w"(result)
15106 : "Q"(temp), "w"(idx)
15107 : "v16", "v17", "memory");
15108 return result;
15111 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15112 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
15114 uint8x8_t result;
15115 uint8x16x2_t temp;
15116 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15117 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15118 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15119 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15120 : "=w"(result)
15121 : "Q"(temp), "w"(idx)
15122 : "v16", "v17", "memory");
15123 return result;
15126 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15127 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
15129 poly8x8_t result;
15130 poly8x16x2_t temp;
15131 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15132 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15133 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15134 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15135 : "=w"(result)
15136 : "Q"(temp), "w"(idx)
15137 : "v16", "v17", "memory");
15138 return result;
15141 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15142 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
15144 int8x8_t result = r;
15145 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15146 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15147 : "+w"(result)
15148 : "w"(temp), "w"(idx)
15149 : /* No clobbers */);
15150 return result;
15153 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15154 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
15156 uint8x8_t result = r;
15157 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15158 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15159 : "+w"(result)
15160 : "w"(temp), "w"(idx)
15161 : /* No clobbers */);
15162 return result;
15165 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15166 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
15168 poly8x8_t result = r;
15169 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15170 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15171 : "+w"(result)
15172 : "w"(temp), "w"(idx)
15173 : /* No clobbers */);
15174 return result;
15177 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15178 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
15180 int8x8_t result = r;
15181 int8x16x2_t temp;
15182 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15183 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15184 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15185 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15186 : "+w"(result)
15187 : "Q"(temp), "w"(idx)
15188 : "v16", "v17", "memory");
15189 return result;
15192 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15193 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
15195 uint8x8_t result = r;
15196 uint8x16x2_t temp;
15197 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15198 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15199 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15200 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15201 : "+w"(result)
15202 : "Q"(temp), "w"(idx)
15203 : "v16", "v17", "memory");
15204 return result;
15207 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15208 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
15210 poly8x8_t result = r;
15211 poly8x16x2_t temp;
15212 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15213 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15214 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15215 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15216 : "+w"(result)
15217 : "Q"(temp), "w"(idx)
15218 : "v16", "v17", "memory");
15219 return result;
15222 /* End of temporary inline asm. */
15224 /* Start of optimal implementations in approved order. */
15226 /* vabs */
15228 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15229 vabs_f32 (float32x2_t __a)
15231 return __builtin_aarch64_absv2sf (__a);
15234 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15235 vabs_f64 (float64x1_t __a)
15237 return __builtin_fabs (__a);
15240 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15241 vabs_s8 (int8x8_t __a)
15243 return __builtin_aarch64_absv8qi (__a);
15246 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15247 vabs_s16 (int16x4_t __a)
15249 return __builtin_aarch64_absv4hi (__a);
15252 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15253 vabs_s32 (int32x2_t __a)
15255 return __builtin_aarch64_absv2si (__a);
15258 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15259 vabs_s64 (int64x1_t __a)
15261 return __builtin_llabs (__a);
15264 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15265 vabsq_f32 (float32x4_t __a)
15267 return __builtin_aarch64_absv4sf (__a);
15270 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15271 vabsq_f64 (float64x2_t __a)
15273 return __builtin_aarch64_absv2df (__a);
15276 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15277 vabsq_s8 (int8x16_t __a)
15279 return __builtin_aarch64_absv16qi (__a);
15282 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15283 vabsq_s16 (int16x8_t __a)
15285 return __builtin_aarch64_absv8hi (__a);
15288 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15289 vabsq_s32 (int32x4_t __a)
15291 return __builtin_aarch64_absv4si (__a);
15294 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15295 vabsq_s64 (int64x2_t __a)
15297 return __builtin_aarch64_absv2di (__a);
15300 /* vadd */
15302 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15303 vaddd_s64 (int64x1_t __a, int64x1_t __b)
15305 return __a + __b;
15308 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15309 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
15311 return __a + __b;
15314 /* vaddv */
15316 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15317 vaddv_s8 (int8x8_t __a)
15319 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
15322 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15323 vaddv_s16 (int16x4_t __a)
15325 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
15328 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15329 vaddv_s32 (int32x2_t __a)
15331 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
15334 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15335 vaddv_u8 (uint8x8_t __a)
15337 return vget_lane_u8 ((uint8x8_t)
15338 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
15342 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15343 vaddv_u16 (uint16x4_t __a)
15345 return vget_lane_u16 ((uint16x4_t)
15346 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
15350 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15351 vaddv_u32 (uint32x2_t __a)
15353 return vget_lane_u32 ((uint32x2_t)
15354 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
15358 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15359 vaddvq_s8 (int8x16_t __a)
15361 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
15365 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15366 vaddvq_s16 (int16x8_t __a)
15368 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
15371 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15372 vaddvq_s32 (int32x4_t __a)
15374 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
15377 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15378 vaddvq_s64 (int64x2_t __a)
15380 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
15383 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15384 vaddvq_u8 (uint8x16_t __a)
15386 return vgetq_lane_u8 ((uint8x16_t)
15387 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
15391 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15392 vaddvq_u16 (uint16x8_t __a)
15394 return vgetq_lane_u16 ((uint16x8_t)
15395 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
15399 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15400 vaddvq_u32 (uint32x4_t __a)
15402 return vgetq_lane_u32 ((uint32x4_t)
15403 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
15407 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15408 vaddvq_u64 (uint64x2_t __a)
15410 return vgetq_lane_u64 ((uint64x2_t)
15411 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
15415 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15416 vaddv_f32 (float32x2_t __a)
15418 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
15419 return vget_lane_f32 (__t, 0);
15422 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15423 vaddvq_f32 (float32x4_t __a)
15425 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
15426 return vgetq_lane_f32 (__t, 0);
15429 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15430 vaddvq_f64 (float64x2_t __a)
15432 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
15433 return vgetq_lane_f64 (__t, 0);
15436 /* vbsl */
15438 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15439 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
15441 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
15444 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15445 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
15447 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
15450 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15451 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
15453 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
15456 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15457 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
15459 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
15462 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15463 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
15465 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
15468 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15469 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
15471 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
15474 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15475 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
15477 return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
15480 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15481 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
15483 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
15486 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15487 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
15489 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
15492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15493 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
15495 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
15498 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15499 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
15501 return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
15504 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15505 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
15507 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
15510 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15511 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
15513 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
15516 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15517 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
15519 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
15522 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15523 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
15525 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
15528 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15529 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
15531 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
15534 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15535 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
15537 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
15540 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15541 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
15543 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
15546 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15547 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
15549 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
15552 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15553 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
15555 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
15558 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15559 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
15561 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
15564 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15565 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
15567 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
15570 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15571 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
15573 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
15576 #ifdef __ARM_FEATURE_CRYPTO
15578 /* vaes */
15580 static __inline uint8x16_t
15581 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
15583 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
15586 static __inline uint8x16_t
15587 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
15589 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
15592 static __inline uint8x16_t
15593 vaesmcq_u8 (uint8x16_t data)
15595 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
15598 static __inline uint8x16_t
15599 vaesimcq_u8 (uint8x16_t data)
15601 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
15604 #endif
15606 /* vcage */
15608 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15609 vcages_f32 (float32_t __a, float32_t __b)
15611 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
15614 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15615 vcage_f32 (float32x2_t __a, float32x2_t __b)
15617 return vabs_f32 (__a) >= vabs_f32 (__b);
15620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15621 vcageq_f32 (float32x4_t __a, float32x4_t __b)
15623 return vabsq_f32 (__a) >= vabsq_f32 (__b);
15626 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15627 vcaged_f64 (float64_t __a, float64_t __b)
15629 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
15632 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15633 vcageq_f64 (float64x2_t __a, float64x2_t __b)
15635 return vabsq_f64 (__a) >= vabsq_f64 (__b);
15638 /* vcagt */
15640 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15641 vcagts_f32 (float32_t __a, float32_t __b)
15643 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
15646 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15647 vcagt_f32 (float32x2_t __a, float32x2_t __b)
15649 return vabs_f32 (__a) > vabs_f32 (__b);
15652 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15653 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
15655 return vabsq_f32 (__a) > vabsq_f32 (__b);
15658 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15659 vcagtd_f64 (float64_t __a, float64_t __b)
15661 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
15664 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15665 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
15667 return vabsq_f64 (__a) > vabsq_f64 (__b);
15670 /* vcale */
15672 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15673 vcale_f32 (float32x2_t __a, float32x2_t __b)
15675 return vabs_f32 (__a) <= vabs_f32 (__b);
15678 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15679 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
15681 return vabsq_f32 (__a) <= vabsq_f32 (__b);
15684 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15685 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
15687 return vabsq_f64 (__a) <= vabsq_f64 (__b);
15690 /* vcalt */
15692 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15693 vcalt_f32 (float32x2_t __a, float32x2_t __b)
15695 return vabs_f32 (__a) < vabs_f32 (__b);
15698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15699 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
15701 return vabsq_f32 (__a) < vabsq_f32 (__b);
15704 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15705 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
15707 return vabsq_f64 (__a) < vabsq_f64 (__b);
15710 /* vceq - vector. */
15712 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15713 vceq_f32 (float32x2_t __a, float32x2_t __b)
15715 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15718 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15719 vceq_f64 (float64x1_t __a, float64x1_t __b)
15721 return __a == __b ? -1ll : 0ll;
15724 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15725 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
15727 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15728 (int8x8_t) __b);
15731 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15732 vceq_s8 (int8x8_t __a, int8x8_t __b)
15734 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15737 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15738 vceq_s16 (int16x4_t __a, int16x4_t __b)
15740 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15743 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15744 vceq_s32 (int32x2_t __a, int32x2_t __b)
15746 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15749 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15750 vceq_s64 (int64x1_t __a, int64x1_t __b)
15752 return __a == __b ? -1ll : 0ll;
15755 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15756 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
15758 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15759 (int8x8_t) __b);
15762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15763 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
15765 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15766 (int16x4_t) __b);
15769 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15770 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
15772 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15773 (int32x2_t) __b);
15776 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15777 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
15779 return __a == __b ? -1ll : 0ll;
15782 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15783 vceqq_f32 (float32x4_t __a, float32x4_t __b)
15785 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15788 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15789 vceqq_f64 (float64x2_t __a, float64x2_t __b)
15791 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15794 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15795 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
15797 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15798 (int8x16_t) __b);
15801 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15802 vceqq_s8 (int8x16_t __a, int8x16_t __b)
15804 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15807 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15808 vceqq_s16 (int16x8_t __a, int16x8_t __b)
15810 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15813 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15814 vceqq_s32 (int32x4_t __a, int32x4_t __b)
15816 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
15819 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15820 vceqq_s64 (int64x2_t __a, int64x2_t __b)
15822 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
15825 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15826 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
15828 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15829 (int8x16_t) __b);
15832 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15833 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
15835 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
15836 (int16x8_t) __b);
15839 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15840 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
15842 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
15843 (int32x4_t) __b);
15846 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15847 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
15849 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
15850 (int64x2_t) __b);
15853 /* vceq - scalar. */
15855 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15856 vceqs_f32 (float32_t __a, float32_t __b)
15858 return __a == __b ? -1 : 0;
15861 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15862 vceqd_s64 (int64x1_t __a, int64x1_t __b)
15864 return __a == __b ? -1ll : 0ll;
15867 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15868 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
15870 return __a == __b ? -1ll : 0ll;
15873 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15874 vceqd_f64 (float64_t __a, float64_t __b)
15876 return __a == __b ? -1ll : 0ll;
15879 /* vceqz - vector. */
15881 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15882 vceqz_f32 (float32x2_t __a)
15884 float32x2_t __b = {0.0f, 0.0f};
15885 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15888 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15889 vceqz_f64 (float64x1_t __a)
15891 return __a == 0.0 ? -1ll : 0ll;
15894 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15895 vceqz_p8 (poly8x8_t __a)
15897 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15898 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15899 (int8x8_t) __b);
15902 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15903 vceqz_s8 (int8x8_t __a)
15905 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15906 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15909 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15910 vceqz_s16 (int16x4_t __a)
15912 int16x4_t __b = {0, 0, 0, 0};
15913 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15916 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15917 vceqz_s32 (int32x2_t __a)
15919 int32x2_t __b = {0, 0};
15920 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15923 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15924 vceqz_s64 (int64x1_t __a)
15926 return __a == 0ll ? -1ll : 0ll;
15929 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15930 vceqz_u8 (uint8x8_t __a)
15932 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15933 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15934 (int8x8_t) __b);
15937 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15938 vceqz_u16 (uint16x4_t __a)
15940 uint16x4_t __b = {0, 0, 0, 0};
15941 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15942 (int16x4_t) __b);
15945 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15946 vceqz_u32 (uint32x2_t __a)
15948 uint32x2_t __b = {0, 0};
15949 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15950 (int32x2_t) __b);
15953 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15954 vceqz_u64 (uint64x1_t __a)
15956 return __a == 0ll ? -1ll : 0ll;
15959 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15960 vceqzq_f32 (float32x4_t __a)
15962 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15963 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15966 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15967 vceqzq_f64 (float64x2_t __a)
15969 float64x2_t __b = {0.0, 0.0};
15970 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15973 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15974 vceqzq_p8 (poly8x16_t __a)
15976 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15977 0, 0, 0, 0, 0, 0, 0, 0};
15978 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15979 (int8x16_t) __b);
15982 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15983 vceqzq_s8 (int8x16_t __a)
15985 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15986 0, 0, 0, 0, 0, 0, 0, 0};
15987 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15990 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15991 vceqzq_s16 (int16x8_t __a)
15993 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15994 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15997 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15998 vceqzq_s32 (int32x4_t __a)
16000 int32x4_t __b = {0, 0, 0, 0};
16001 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
16004 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16005 vceqzq_s64 (int64x2_t __a)
16007 int64x2_t __b = {0, 0};
16008 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
16011 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16012 vceqzq_u8 (uint8x16_t __a)
16014 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16015 0, 0, 0, 0, 0, 0, 0, 0};
16016 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16017 (int8x16_t) __b);
16020 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16021 vceqzq_u16 (uint16x8_t __a)
16023 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16024 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
16025 (int16x8_t) __b);
16028 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16029 vceqzq_u32 (uint32x4_t __a)
16031 uint32x4_t __b = {0, 0, 0, 0};
16032 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
16033 (int32x4_t) __b);
16036 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16037 vceqzq_u64 (uint64x2_t __a)
16039 uint64x2_t __b = {0, 0};
16040 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
16041 (int64x2_t) __b);
16044 /* vceqz - scalar. */
16046 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16047 vceqzs_f32 (float32_t __a)
16049 return __a == 0.0f ? -1 : 0;
16052 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16053 vceqzd_s64 (int64x1_t __a)
16055 return __a == 0 ? -1ll : 0ll;
16058 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16059 vceqzd_u64 (int64x1_t __a)
16061 return __a == 0 ? -1ll : 0ll;
16064 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16065 vceqzd_f64 (float64_t __a)
16067 return __a == 0.0 ? -1ll : 0ll;
16070 /* vcge - vector. */
16072 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16073 vcge_f32 (float32x2_t __a, float32x2_t __b)
16075 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16078 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16079 vcge_f64 (float64x1_t __a, float64x1_t __b)
16081 return __a >= __b ? -1ll : 0ll;
16084 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16085 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
16087 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16088 (int8x8_t) __b);
16091 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16092 vcge_s8 (int8x8_t __a, int8x8_t __b)
16094 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16097 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16098 vcge_s16 (int16x4_t __a, int16x4_t __b)
16100 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16103 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16104 vcge_s32 (int32x2_t __a, int32x2_t __b)
16106 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16109 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16110 vcge_s64 (int64x1_t __a, int64x1_t __b)
16112 return __a >= __b ? -1ll : 0ll;
16115 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16116 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
16118 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16119 (int8x8_t) __b);
16122 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16123 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
16125 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16126 (int16x4_t) __b);
16129 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16130 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
16132 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16133 (int32x2_t) __b);
16136 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16137 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
16139 return __a >= __b ? -1ll : 0ll;
16142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16143 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
16145 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16148 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16149 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
16151 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16154 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16155 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
16157 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16158 (int8x16_t) __b);
16161 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16162 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
16164 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16167 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16168 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
16170 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16173 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16174 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
16176 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16179 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16180 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
16182 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16185 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16186 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
16188 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16189 (int8x16_t) __b);
16192 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16193 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
16195 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16196 (int16x8_t) __b);
16199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16200 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
16202 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16203 (int32x4_t) __b);
16206 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16207 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
16209 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16210 (int64x2_t) __b);
16213 /* vcge - scalar. */
16215 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16216 vcges_f32 (float32_t __a, float32_t __b)
16218 return __a >= __b ? -1 : 0;
16221 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16222 vcged_s64 (int64x1_t __a, int64x1_t __b)
16224 return __a >= __b ? -1ll : 0ll;
16227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16228 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
16230 return __a >= __b ? -1ll : 0ll;
16233 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16234 vcged_f64 (float64_t __a, float64_t __b)
16236 return __a >= __b ? -1ll : 0ll;
16239 /* vcgez - vector. */
16241 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16242 vcgez_f32 (float32x2_t __a)
16244 float32x2_t __b = {0.0f, 0.0f};
16245 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16248 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16249 vcgez_f64 (float64x1_t __a)
16251 return __a >= 0.0 ? -1ll : 0ll;
16254 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16255 vcgez_p8 (poly8x8_t __a)
16257 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16258 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16259 (int8x8_t) __b);
16262 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16263 vcgez_s8 (int8x8_t __a)
16265 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16266 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16269 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16270 vcgez_s16 (int16x4_t __a)
16272 int16x4_t __b = {0, 0, 0, 0};
16273 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16276 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16277 vcgez_s32 (int32x2_t __a)
16279 int32x2_t __b = {0, 0};
16280 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16283 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16284 vcgez_s64 (int64x1_t __a)
16286 return __a >= 0ll ? -1ll : 0ll;
16289 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16290 vcgez_u8 (uint8x8_t __a)
16292 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16293 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16294 (int8x8_t) __b);
16297 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16298 vcgez_u16 (uint16x4_t __a)
16300 uint16x4_t __b = {0, 0, 0, 0};
16301 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16302 (int16x4_t) __b);
16305 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16306 vcgez_u32 (uint32x2_t __a)
16308 uint32x2_t __b = {0, 0};
16309 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16310 (int32x2_t) __b);
16313 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16314 vcgez_u64 (uint64x1_t __a)
16316 return __a >= 0ll ? -1ll : 0ll;
16319 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16320 vcgezq_f32 (float32x4_t __a)
16322 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16323 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16326 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16327 vcgezq_f64 (float64x2_t __a)
16329 float64x2_t __b = {0.0, 0.0};
16330 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16333 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16334 vcgezq_p8 (poly8x16_t __a)
16336 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16337 0, 0, 0, 0, 0, 0, 0, 0};
16338 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16339 (int8x16_t) __b);
16342 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16343 vcgezq_s8 (int8x16_t __a)
16345 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16346 0, 0, 0, 0, 0, 0, 0, 0};
16347 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16350 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16351 vcgezq_s16 (int16x8_t __a)
16353 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16354 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16357 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16358 vcgezq_s32 (int32x4_t __a)
16360 int32x4_t __b = {0, 0, 0, 0};
16361 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16364 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16365 vcgezq_s64 (int64x2_t __a)
16367 int64x2_t __b = {0, 0};
16368 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16371 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16372 vcgezq_u8 (uint8x16_t __a)
16374 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16375 0, 0, 0, 0, 0, 0, 0, 0};
16376 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16377 (int8x16_t) __b);
16380 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16381 vcgezq_u16 (uint16x8_t __a)
16383 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16384 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16385 (int16x8_t) __b);
16388 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16389 vcgezq_u32 (uint32x4_t __a)
16391 uint32x4_t __b = {0, 0, 0, 0};
16392 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16393 (int32x4_t) __b);
16396 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16397 vcgezq_u64 (uint64x2_t __a)
16399 uint64x2_t __b = {0, 0};
16400 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16401 (int64x2_t) __b);
16404 /* vcgez - scalar. */
16406 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16407 vcgezs_f32 (float32_t __a)
16409 return __a >= 0.0f ? -1 : 0;
16412 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16413 vcgezd_s64 (int64x1_t __a)
16415 return __a >= 0 ? -1ll : 0ll;
16418 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16419 vcgezd_u64 (int64x1_t __a)
16421 return __a >= 0 ? -1ll : 0ll;
16424 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16425 vcgezd_f64 (float64_t __a)
16427 return __a >= 0.0 ? -1ll : 0ll;
16430 /* vcgt - vector. */
16432 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16433 vcgt_f32 (float32x2_t __a, float32x2_t __b)
16435 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16438 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16439 vcgt_f64 (float64x1_t __a, float64x1_t __b)
16441 return __a > __b ? -1ll : 0ll;
16444 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16445 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
16447 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16448 (int8x8_t) __b);
16451 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16452 vcgt_s8 (int8x8_t __a, int8x8_t __b)
16454 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16457 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16458 vcgt_s16 (int16x4_t __a, int16x4_t __b)
16460 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16463 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16464 vcgt_s32 (int32x2_t __a, int32x2_t __b)
16466 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16469 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16470 vcgt_s64 (int64x1_t __a, int64x1_t __b)
16472 return __a > __b ? -1ll : 0ll;
16475 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16476 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
16478 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16479 (int8x8_t) __b);
16482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16483 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
16485 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16486 (int16x4_t) __b);
16489 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16490 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
16492 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16493 (int32x2_t) __b);
16496 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16497 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
16499 return __a > __b ? -1ll : 0ll;
16502 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16503 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
16505 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16508 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16509 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
16511 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16514 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16515 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
16517 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16518 (int8x16_t) __b);
16521 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16522 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
16524 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16527 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16528 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
16530 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16533 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16534 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
16536 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16539 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16540 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
16542 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16545 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16546 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
16548 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16549 (int8x16_t) __b);
16552 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16553 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
16555 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16556 (int16x8_t) __b);
16559 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16560 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
16562 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16563 (int32x4_t) __b);
16566 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16567 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
16569 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16570 (int64x2_t) __b);
16573 /* vcgt - scalar. */
16575 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16576 vcgts_f32 (float32_t __a, float32_t __b)
16578 return __a > __b ? -1 : 0;
16581 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16582 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
16584 return __a > __b ? -1ll : 0ll;
16587 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16588 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
16590 return __a > __b ? -1ll : 0ll;
16593 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16594 vcgtd_f64 (float64_t __a, float64_t __b)
16596 return __a > __b ? -1ll : 0ll;
16599 /* vcgtz - vector. */
16601 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16602 vcgtz_f32 (float32x2_t __a)
16604 float32x2_t __b = {0.0f, 0.0f};
16605 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16608 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16609 vcgtz_f64 (float64x1_t __a)
16611 return __a > 0.0 ? -1ll : 0ll;
16614 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16615 vcgtz_p8 (poly8x8_t __a)
16617 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16618 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16619 (int8x8_t) __b);
16622 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16623 vcgtz_s8 (int8x8_t __a)
16625 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16626 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16629 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16630 vcgtz_s16 (int16x4_t __a)
16632 int16x4_t __b = {0, 0, 0, 0};
16633 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16636 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16637 vcgtz_s32 (int32x2_t __a)
16639 int32x2_t __b = {0, 0};
16640 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16643 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16644 vcgtz_s64 (int64x1_t __a)
16646 return __a > 0ll ? -1ll : 0ll;
16649 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16650 vcgtz_u8 (uint8x8_t __a)
16652 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16653 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16654 (int8x8_t) __b);
16657 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16658 vcgtz_u16 (uint16x4_t __a)
16660 uint16x4_t __b = {0, 0, 0, 0};
16661 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16662 (int16x4_t) __b);
16665 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16666 vcgtz_u32 (uint32x2_t __a)
16668 uint32x2_t __b = {0, 0};
16669 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16670 (int32x2_t) __b);
16673 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16674 vcgtz_u64 (uint64x1_t __a)
16676 return __a > 0ll ? -1ll : 0ll;
16679 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16680 vcgtzq_f32 (float32x4_t __a)
16682 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16683 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16686 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16687 vcgtzq_f64 (float64x2_t __a)
16689 float64x2_t __b = {0.0, 0.0};
16690 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16693 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16694 vcgtzq_p8 (poly8x16_t __a)
16696 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16697 0, 0, 0, 0, 0, 0, 0, 0};
16698 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16699 (int8x16_t) __b);
16702 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16703 vcgtzq_s8 (int8x16_t __a)
16705 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16706 0, 0, 0, 0, 0, 0, 0, 0};
16707 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16711 vcgtzq_s16 (int16x8_t __a)
16713 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16714 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16717 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16718 vcgtzq_s32 (int32x4_t __a)
16720 int32x4_t __b = {0, 0, 0, 0};
16721 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16724 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16725 vcgtzq_s64 (int64x2_t __a)
16727 int64x2_t __b = {0, 0};
16728 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16731 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16732 vcgtzq_u8 (uint8x16_t __a)
16734 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16735 0, 0, 0, 0, 0, 0, 0, 0};
16736 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16737 (int8x16_t) __b);
16740 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16741 vcgtzq_u16 (uint16x8_t __a)
16743 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16744 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16745 (int16x8_t) __b);
16748 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16749 vcgtzq_u32 (uint32x4_t __a)
16751 uint32x4_t __b = {0, 0, 0, 0};
16752 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16753 (int32x4_t) __b);
16756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16757 vcgtzq_u64 (uint64x2_t __a)
16759 uint64x2_t __b = {0, 0};
16760 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16761 (int64x2_t) __b);
16764 /* vcgtz - scalar. */
16766 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16767 vcgtzs_f32 (float32_t __a)
16769 return __a > 0.0f ? -1 : 0;
16772 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16773 vcgtzd_s64 (int64x1_t __a)
16775 return __a > 0 ? -1ll : 0ll;
16778 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16779 vcgtzd_u64 (int64x1_t __a)
16781 return __a > 0 ? -1ll : 0ll;
16784 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16785 vcgtzd_f64 (float64_t __a)
16787 return __a > 0.0 ? -1ll : 0ll;
16790 /* vcle - vector. */
16792 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16793 vcle_f32 (float32x2_t __a, float32x2_t __b)
16795 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
16798 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16799 vcle_f64 (float64x1_t __a, float64x1_t __b)
16801 return __a <= __b ? -1ll : 0ll;
16804 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16805 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
16807 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
16808 (int8x8_t) __a);
16811 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16812 vcle_s8 (int8x8_t __a, int8x8_t __b)
16814 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
16817 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16818 vcle_s16 (int16x4_t __a, int16x4_t __b)
16820 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
16823 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16824 vcle_s32 (int32x2_t __a, int32x2_t __b)
16826 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
16829 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16830 vcle_s64 (int64x1_t __a, int64x1_t __b)
16832 return __a <= __b ? -1ll : 0ll;
16835 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16836 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
16838 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
16839 (int8x8_t) __a);
16842 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16843 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
16845 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
16846 (int16x4_t) __a);
16849 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16850 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
16852 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
16853 (int32x2_t) __a);
16856 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16857 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
16859 return __a <= __b ? -1ll : 0ll;
16862 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16863 vcleq_f32 (float32x4_t __a, float32x4_t __b)
16865 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
16868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16869 vcleq_f64 (float64x2_t __a, float64x2_t __b)
16871 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
16874 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16875 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
16877 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
16878 (int8x16_t) __a);
16881 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16882 vcleq_s8 (int8x16_t __a, int8x16_t __b)
16884 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
16887 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16888 vcleq_s16 (int16x8_t __a, int16x8_t __b)
16890 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
16893 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16894 vcleq_s32 (int32x4_t __a, int32x4_t __b)
16896 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
16899 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16900 vcleq_s64 (int64x2_t __a, int64x2_t __b)
16902 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
16905 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16906 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
16908 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
16909 (int8x16_t) __a);
16912 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16913 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
16915 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
16916 (int16x8_t) __a);
16919 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16920 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
16922 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
16923 (int32x4_t) __a);
16926 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16927 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
16929 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
16930 (int64x2_t) __a);
16933 /* vcle - scalar. */
16935 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16936 vcles_f32 (float32_t __a, float32_t __b)
16938 return __a <= __b ? -1 : 0;
16941 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16942 vcled_s64 (int64x1_t __a, int64x1_t __b)
16944 return __a <= __b ? -1ll : 0ll;
16947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16948 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
16950 return __a <= __b ? -1ll : 0ll;
16953 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16954 vcled_f64 (float64_t __a, float64_t __b)
16956 return __a <= __b ? -1ll : 0ll;
16959 /* vclez - vector. */
16961 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16962 vclez_f32 (float32x2_t __a)
16964 float32x2_t __b = {0.0f, 0.0f};
16965 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
16968 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16969 vclez_f64 (float64x1_t __a)
16971 return __a <= 0.0 ? -1ll : 0ll;
16974 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16975 vclez_p8 (poly8x8_t __a)
16977 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16978 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
16979 (int8x8_t) __b);
16982 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16983 vclez_s8 (int8x8_t __a)
16985 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16986 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
16989 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16990 vclez_s16 (int16x4_t __a)
16992 int16x4_t __b = {0, 0, 0, 0};
16993 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
16996 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16997 vclez_s32 (int32x2_t __a)
16999 int32x2_t __b = {0, 0};
17000 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
17003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17004 vclez_s64 (int64x1_t __a)
17006 return __a <= 0ll ? -1ll : 0ll;
17009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17010 vclez_u64 (uint64x1_t __a)
17012 return __a <= 0ll ? -1ll : 0ll;
17015 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17016 vclezq_f32 (float32x4_t __a)
17018 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17019 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
17022 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17023 vclezq_f64 (float64x2_t __a)
17025 float64x2_t __b = {0.0, 0.0};
17026 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
17029 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17030 vclezq_p8 (poly8x16_t __a)
17032 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17033 0, 0, 0, 0, 0, 0, 0, 0};
17034 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
17035 (int8x16_t) __b);
17038 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17039 vclezq_s8 (int8x16_t __a)
17041 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17042 0, 0, 0, 0, 0, 0, 0, 0};
17043 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
17046 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17047 vclezq_s16 (int16x8_t __a)
17049 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17050 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
17053 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17054 vclezq_s32 (int32x4_t __a)
17056 int32x4_t __b = {0, 0, 0, 0};
17057 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
17060 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17061 vclezq_s64 (int64x2_t __a)
17063 int64x2_t __b = {0, 0};
17064 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
17067 /* vclez - scalar. */
17069 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17070 vclezs_f32 (float32_t __a)
17072 return __a <= 0.0f ? -1 : 0;
17075 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17076 vclezd_s64 (int64x1_t __a)
17078 return __a <= 0 ? -1ll : 0ll;
17081 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17082 vclezd_u64 (int64x1_t __a)
17084 return __a <= 0 ? -1ll : 0ll;
17087 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17088 vclezd_f64 (float64_t __a)
17090 return __a <= 0.0 ? -1ll : 0ll;
17093 /* vclt - vector. */
17095 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17096 vclt_f32 (float32x2_t __a, float32x2_t __b)
17098 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
17101 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17102 vclt_f64 (float64x1_t __a, float64x1_t __b)
17104 return __a < __b ? -1ll : 0ll;
17107 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17108 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
17110 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
17111 (int8x8_t) __a);
17114 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17115 vclt_s8 (int8x8_t __a, int8x8_t __b)
17117 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
17120 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17121 vclt_s16 (int16x4_t __a, int16x4_t __b)
17123 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
17126 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17127 vclt_s32 (int32x2_t __a, int32x2_t __b)
17129 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
17132 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17133 vclt_s64 (int64x1_t __a, int64x1_t __b)
17135 return __a < __b ? -1ll : 0ll;
17138 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17139 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
17141 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
17142 (int8x8_t) __a);
17145 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17146 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
17148 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
17149 (int16x4_t) __a);
17152 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17153 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
17155 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
17156 (int32x2_t) __a);
17159 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17160 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
17162 return __a < __b ? -1ll : 0ll;
17165 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17166 vcltq_f32 (float32x4_t __a, float32x4_t __b)
17168 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
17171 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17172 vcltq_f64 (float64x2_t __a, float64x2_t __b)
17174 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
17177 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17178 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
17180 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
17181 (int8x16_t) __a);
17184 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17185 vcltq_s8 (int8x16_t __a, int8x16_t __b)
17187 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
17190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17191 vcltq_s16 (int16x8_t __a, int16x8_t __b)
17193 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
17196 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17197 vcltq_s32 (int32x4_t __a, int32x4_t __b)
17199 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
17202 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17203 vcltq_s64 (int64x2_t __a, int64x2_t __b)
17205 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
17208 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17209 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
17211 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
17212 (int8x16_t) __a);
17215 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17216 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
17218 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
17219 (int16x8_t) __a);
17222 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17223 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
17225 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
17226 (int32x4_t) __a);
17229 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17230 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
17232 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
17233 (int64x2_t) __a);
17236 /* vclt - scalar. */
17238 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17239 vclts_f32 (float32_t __a, float32_t __b)
17241 return __a < __b ? -1 : 0;
17244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17245 vcltd_s64 (int64x1_t __a, int64x1_t __b)
17247 return __a < __b ? -1ll : 0ll;
17250 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17251 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
17253 return __a < __b ? -1ll : 0ll;
17256 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17257 vcltd_f64 (float64_t __a, float64_t __b)
17259 return __a < __b ? -1ll : 0ll;
17262 /* vcltz - vector. */
17264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17265 vcltz_f32 (float32x2_t __a)
17267 float32x2_t __b = {0.0f, 0.0f};
17268 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
17271 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17272 vcltz_f64 (float64x1_t __a)
17274 return __a < 0.0 ? -1ll : 0ll;
17277 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17278 vcltz_p8 (poly8x8_t __a)
17280 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17281 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
17282 (int8x8_t) __b);
17285 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17286 vcltz_s8 (int8x8_t __a)
17288 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17289 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
17292 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17293 vcltz_s16 (int16x4_t __a)
17295 int16x4_t __b = {0, 0, 0, 0};
17296 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
17299 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17300 vcltz_s32 (int32x2_t __a)
17302 int32x2_t __b = {0, 0};
17303 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
17306 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17307 vcltz_s64 (int64x1_t __a)
17309 return __a < 0ll ? -1ll : 0ll;
17312 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17313 vcltzq_f32 (float32x4_t __a)
17315 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17316 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
17319 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17320 vcltzq_f64 (float64x2_t __a)
17322 float64x2_t __b = {0.0, 0.0};
17323 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
17326 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17327 vcltzq_p8 (poly8x16_t __a)
17329 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17330 0, 0, 0, 0, 0, 0, 0, 0};
17331 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
17332 (int8x16_t) __b);
17335 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17336 vcltzq_s8 (int8x16_t __a)
17338 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17339 0, 0, 0, 0, 0, 0, 0, 0};
17340 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
17343 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17344 vcltzq_s16 (int16x8_t __a)
17346 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17347 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
17350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17351 vcltzq_s32 (int32x4_t __a)
17353 int32x4_t __b = {0, 0, 0, 0};
17354 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
17357 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17358 vcltzq_s64 (int64x2_t __a)
17360 int64x2_t __b = {0, 0};
17361 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
17364 /* vcltz - scalar. */
17366 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17367 vcltzs_f32 (float32_t __a)
17369 return __a < 0.0f ? -1 : 0;
17372 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17373 vcltzd_s64 (int64x1_t __a)
17375 return __a < 0 ? -1ll : 0ll;
17378 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17379 vcltzd_u64 (int64x1_t __a)
17381 return __a < 0 ? -1ll : 0ll;
17384 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17385 vcltzd_f64 (float64_t __a)
17387 return __a < 0.0 ? -1ll : 0ll;
17390 /* vclz. */
17392 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17393 vclz_s8 (int8x8_t __a)
17395 return __builtin_aarch64_clzv8qi (__a);
17398 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17399 vclz_s16 (int16x4_t __a)
17401 return __builtin_aarch64_clzv4hi (__a);
17404 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17405 vclz_s32 (int32x2_t __a)
17407 return __builtin_aarch64_clzv2si (__a);
17410 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17411 vclz_u8 (uint8x8_t __a)
17413 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
17416 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17417 vclz_u16 (uint16x4_t __a)
17419 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
17422 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17423 vclz_u32 (uint32x2_t __a)
17425 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
17428 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17429 vclzq_s8 (int8x16_t __a)
17431 return __builtin_aarch64_clzv16qi (__a);
17434 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17435 vclzq_s16 (int16x8_t __a)
17437 return __builtin_aarch64_clzv8hi (__a);
17440 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17441 vclzq_s32 (int32x4_t __a)
17443 return __builtin_aarch64_clzv4si (__a);
17446 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17447 vclzq_u8 (uint8x16_t __a)
17449 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
17452 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17453 vclzq_u16 (uint16x8_t __a)
17455 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
17458 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17459 vclzq_u32 (uint32x4_t __a)
17461 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
17464 /* vcvt (double -> float). */
17466 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17467 vcvt_f32_f64 (float64x2_t __a)
17469 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
17472 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17473 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
17475 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
17478 /* vcvt (float -> double). */
17480 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17481 vcvt_f64_f32 (float32x2_t __a)
17484 return __builtin_aarch64_float_extend_lo_v2df (__a);
17487 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17488 vcvt_high_f64_f32 (float32x4_t __a)
17490 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
17493 /* vcvt (<u>int -> float) */
17495 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17496 vcvtd_f64_s64 (int64_t __a)
17498 return (float64_t) __a;
17501 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17502 vcvtd_f64_u64 (uint64_t __a)
17504 return (float64_t) __a;
17507 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17508 vcvts_f32_s32 (int32_t __a)
17510 return (float32_t) __a;
17513 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17514 vcvts_f32_u32 (uint32_t __a)
17516 return (float32_t) __a;
17519 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17520 vcvt_f32_s32 (int32x2_t __a)
17522 return __builtin_aarch64_floatv2siv2sf (__a);
17525 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17526 vcvt_f32_u32 (uint32x2_t __a)
17528 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
17531 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17532 vcvtq_f32_s32 (int32x4_t __a)
17534 return __builtin_aarch64_floatv4siv4sf (__a);
17537 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17538 vcvtq_f32_u32 (uint32x4_t __a)
17540 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
17543 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17544 vcvtq_f64_s64 (int64x2_t __a)
17546 return __builtin_aarch64_floatv2div2df (__a);
17549 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17550 vcvtq_f64_u64 (uint64x2_t __a)
17552 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
17555 /* vcvt (float -> <u>int) */
17557 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17558 vcvtd_s64_f64 (float64_t __a)
17560 return (int64_t) __a;
17563 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17564 vcvtd_u64_f64 (float64_t __a)
17566 return (uint64_t) __a;
17569 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17570 vcvts_s32_f32 (float32_t __a)
17572 return (int32_t) __a;
17575 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17576 vcvts_u32_f32 (float32_t __a)
17578 return (uint32_t) __a;
17581 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17582 vcvt_s32_f32 (float32x2_t __a)
17584 return __builtin_aarch64_lbtruncv2sfv2si (__a);
17587 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17588 vcvt_u32_f32 (float32x2_t __a)
17590 /* TODO: This cast should go away when builtins have
17591 their correct types. */
17592 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
17595 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17596 vcvtq_s32_f32 (float32x4_t __a)
17598 return __builtin_aarch64_lbtruncv4sfv4si (__a);
17601 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17602 vcvtq_u32_f32 (float32x4_t __a)
17604 /* TODO: This cast should go away when builtins have
17605 their correct types. */
17606 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
17609 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17610 vcvtq_s64_f64 (float64x2_t __a)
17612 return __builtin_aarch64_lbtruncv2dfv2di (__a);
17615 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17616 vcvtq_u64_f64 (float64x2_t __a)
17618 /* TODO: This cast should go away when builtins have
17619 their correct types. */
17620 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
17623 /* vcvta */
17625 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17626 vcvtad_s64_f64 (float64_t __a)
17628 return __builtin_aarch64_lrounddfdi (__a);
17631 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17632 vcvtad_u64_f64 (float64_t __a)
17634 return __builtin_aarch64_lroundudfdi (__a);
17637 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17638 vcvtas_s32_f32 (float32_t __a)
17640 return __builtin_aarch64_lroundsfsi (__a);
17643 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17644 vcvtas_u32_f32 (float32_t __a)
17646 return __builtin_aarch64_lroundusfsi (__a);
17649 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17650 vcvta_s32_f32 (float32x2_t __a)
17652 return __builtin_aarch64_lroundv2sfv2si (__a);
17655 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17656 vcvta_u32_f32 (float32x2_t __a)
17658 /* TODO: This cast should go away when builtins have
17659 their correct types. */
17660 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
17663 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17664 vcvtaq_s32_f32 (float32x4_t __a)
17666 return __builtin_aarch64_lroundv4sfv4si (__a);
17669 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17670 vcvtaq_u32_f32 (float32x4_t __a)
17672 /* TODO: This cast should go away when builtins have
17673 their correct types. */
17674 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
17677 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17678 vcvtaq_s64_f64 (float64x2_t __a)
17680 return __builtin_aarch64_lroundv2dfv2di (__a);
17683 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17684 vcvtaq_u64_f64 (float64x2_t __a)
17686 /* TODO: This cast should go away when builtins have
17687 their correct types. */
17688 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
17691 /* vcvtm */
17693 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17694 vcvtmd_s64_f64 (float64_t __a)
17696 return __builtin_llfloor (__a);
17699 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17700 vcvtmd_u64_f64 (float64_t __a)
17702 return __builtin_aarch64_lfloorudfdi (__a);
17705 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17706 vcvtms_s32_f32 (float32_t __a)
17708 return __builtin_ifloorf (__a);
17711 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17712 vcvtms_u32_f32 (float32_t __a)
17714 return __builtin_aarch64_lfloorusfsi (__a);
17717 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17718 vcvtm_s32_f32 (float32x2_t __a)
17720 return __builtin_aarch64_lfloorv2sfv2si (__a);
17723 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17724 vcvtm_u32_f32 (float32x2_t __a)
17726 /* TODO: This cast should go away when builtins have
17727 their correct types. */
17728 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
17731 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17732 vcvtmq_s32_f32 (float32x4_t __a)
17734 return __builtin_aarch64_lfloorv4sfv4si (__a);
17737 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17738 vcvtmq_u32_f32 (float32x4_t __a)
17740 /* TODO: This cast should go away when builtins have
17741 their correct types. */
17742 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
17745 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17746 vcvtmq_s64_f64 (float64x2_t __a)
17748 return __builtin_aarch64_lfloorv2dfv2di (__a);
17751 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17752 vcvtmq_u64_f64 (float64x2_t __a)
17754 /* TODO: This cast should go away when builtins have
17755 their correct types. */
17756 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
17759 /* vcvtn */
17761 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17762 vcvtnd_s64_f64 (float64_t __a)
17764 return __builtin_aarch64_lfrintndfdi (__a);
17767 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17768 vcvtnd_u64_f64 (float64_t __a)
17770 return __builtin_aarch64_lfrintnudfdi (__a);
17773 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17774 vcvtns_s32_f32 (float32_t __a)
17776 return __builtin_aarch64_lfrintnsfsi (__a);
17779 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17780 vcvtns_u32_f32 (float32_t __a)
17782 return __builtin_aarch64_lfrintnusfsi (__a);
17785 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17786 vcvtn_s32_f32 (float32x2_t __a)
17788 return __builtin_aarch64_lfrintnv2sfv2si (__a);
17791 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17792 vcvtn_u32_f32 (float32x2_t __a)
17794 /* TODO: This cast should go away when builtins have
17795 their correct types. */
17796 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
17799 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17800 vcvtnq_s32_f32 (float32x4_t __a)
17802 return __builtin_aarch64_lfrintnv4sfv4si (__a);
17805 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17806 vcvtnq_u32_f32 (float32x4_t __a)
17808 /* TODO: This cast should go away when builtins have
17809 their correct types. */
17810 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
17813 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17814 vcvtnq_s64_f64 (float64x2_t __a)
17816 return __builtin_aarch64_lfrintnv2dfv2di (__a);
17819 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17820 vcvtnq_u64_f64 (float64x2_t __a)
17822 /* TODO: This cast should go away when builtins have
17823 their correct types. */
17824 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
17827 /* vcvtp */
17829 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17830 vcvtpd_s64_f64 (float64_t __a)
17832 return __builtin_llceil (__a);
17835 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17836 vcvtpd_u64_f64 (float64_t __a)
17838 return __builtin_aarch64_lceiludfdi (__a);
17841 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17842 vcvtps_s32_f32 (float32_t __a)
17844 return __builtin_iceilf (__a);
17847 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17848 vcvtps_u32_f32 (float32_t __a)
17850 return __builtin_aarch64_lceilusfsi (__a);
17853 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17854 vcvtp_s32_f32 (float32x2_t __a)
17856 return __builtin_aarch64_lceilv2sfv2si (__a);
17859 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17860 vcvtp_u32_f32 (float32x2_t __a)
17862 /* TODO: This cast should go away when builtins have
17863 their correct types. */
17864 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
17867 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17868 vcvtpq_s32_f32 (float32x4_t __a)
17870 return __builtin_aarch64_lceilv4sfv4si (__a);
17873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17874 vcvtpq_u32_f32 (float32x4_t __a)
17876 /* TODO: This cast should go away when builtins have
17877 their correct types. */
17878 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
17881 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17882 vcvtpq_s64_f64 (float64x2_t __a)
17884 return __builtin_aarch64_lceilv2dfv2di (__a);
17887 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17888 vcvtpq_u64_f64 (float64x2_t __a)
17890 /* TODO: This cast should go away when builtins have
17891 their correct types. */
17892 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
17895 /* vdup_n */
17897 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17898 vdup_n_f32 (float32_t __a)
17900 return (float32x2_t) {__a, __a};
17903 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17904 vdup_n_f64 (float64_t __a)
17906 return __a;
17909 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17910 vdup_n_p8 (poly8_t __a)
17912 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17915 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17916 vdup_n_p16 (poly16_t __a)
17918 return (poly16x4_t) {__a, __a, __a, __a};
17921 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17922 vdup_n_s8 (int8_t __a)
17924 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17927 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17928 vdup_n_s16 (int16_t __a)
17930 return (int16x4_t) {__a, __a, __a, __a};
17933 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17934 vdup_n_s32 (int32_t __a)
17936 return (int32x2_t) {__a, __a};
17939 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17940 vdup_n_s64 (int64_t __a)
17942 return __a;
17945 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17946 vdup_n_u8 (uint8_t __a)
17948 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17951 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17952 vdup_n_u16 (uint16_t __a)
17954 return (uint16x4_t) {__a, __a, __a, __a};
17957 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17958 vdup_n_u32 (uint32_t __a)
17960 return (uint32x2_t) {__a, __a};
17963 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17964 vdup_n_u64 (uint64_t __a)
17966 return __a;
17969 /* vdupq_n */
17971 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17972 vdupq_n_f32 (float32_t __a)
17974 return (float32x4_t) {__a, __a, __a, __a};
17977 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17978 vdupq_n_f64 (float64_t __a)
17980 return (float64x2_t) {__a, __a};
17983 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17984 vdupq_n_p8 (uint32_t __a)
17986 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
17987 __a, __a, __a, __a, __a, __a, __a, __a};
17990 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17991 vdupq_n_p16 (uint32_t __a)
17993 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17997 vdupq_n_s8 (int32_t __a)
17999 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18000 __a, __a, __a, __a, __a, __a, __a, __a};
18003 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18004 vdupq_n_s16 (int32_t __a)
18006 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18009 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18010 vdupq_n_s32 (int32_t __a)
18012 return (int32x4_t) {__a, __a, __a, __a};
18015 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18016 vdupq_n_s64 (int64_t __a)
18018 return (int64x2_t) {__a, __a};
18021 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18022 vdupq_n_u8 (uint32_t __a)
18024 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18025 __a, __a, __a, __a, __a, __a, __a, __a};
18028 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18029 vdupq_n_u16 (uint32_t __a)
18031 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18034 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18035 vdupq_n_u32 (uint32_t __a)
18037 return (uint32x4_t) {__a, __a, __a, __a};
18040 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18041 vdupq_n_u64 (uint64_t __a)
18043 return (uint64x2_t) {__a, __a};
18046 /* vdup_lane */
18048 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18049 vdup_lane_f32 (float32x2_t __a, const int __b)
18051 return __aarch64_vdup_lane_f32 (__a, __b);
18054 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18055 vdup_lane_f64 (float64x1_t __a, const int __b)
18057 return __aarch64_vdup_lane_f64 (__a, __b);
18060 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18061 vdup_lane_p8 (poly8x8_t __a, const int __b)
18063 return __aarch64_vdup_lane_p8 (__a, __b);
18066 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18067 vdup_lane_p16 (poly16x4_t __a, const int __b)
18069 return __aarch64_vdup_lane_p16 (__a, __b);
18072 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18073 vdup_lane_s8 (int8x8_t __a, const int __b)
18075 return __aarch64_vdup_lane_s8 (__a, __b);
18078 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18079 vdup_lane_s16 (int16x4_t __a, const int __b)
18081 return __aarch64_vdup_lane_s16 (__a, __b);
18084 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18085 vdup_lane_s32 (int32x2_t __a, const int __b)
18087 return __aarch64_vdup_lane_s32 (__a, __b);
18090 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18091 vdup_lane_s64 (int64x1_t __a, const int __b)
18093 return __aarch64_vdup_lane_s64 (__a, __b);
18096 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18097 vdup_lane_u8 (uint8x8_t __a, const int __b)
18099 return __aarch64_vdup_lane_u8 (__a, __b);
18102 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18103 vdup_lane_u16 (uint16x4_t __a, const int __b)
18105 return __aarch64_vdup_lane_u16 (__a, __b);
18108 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18109 vdup_lane_u32 (uint32x2_t __a, const int __b)
18111 return __aarch64_vdup_lane_u32 (__a, __b);
18114 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18115 vdup_lane_u64 (uint64x1_t __a, const int __b)
18117 return __aarch64_vdup_lane_u64 (__a, __b);
18120 /* vdup_laneq */
18122 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18123 vdup_laneq_f32 (float32x4_t __a, const int __b)
18125 return __aarch64_vdup_laneq_f32 (__a, __b);
18128 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18129 vdup_laneq_f64 (float64x2_t __a, const int __b)
18131 return __aarch64_vdup_laneq_f64 (__a, __b);
18134 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18135 vdup_laneq_p8 (poly8x16_t __a, const int __b)
18137 return __aarch64_vdup_laneq_p8 (__a, __b);
18140 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18141 vdup_laneq_p16 (poly16x8_t __a, const int __b)
18143 return __aarch64_vdup_laneq_p16 (__a, __b);
18146 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18147 vdup_laneq_s8 (int8x16_t __a, const int __b)
18149 return __aarch64_vdup_laneq_s8 (__a, __b);
18152 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18153 vdup_laneq_s16 (int16x8_t __a, const int __b)
18155 return __aarch64_vdup_laneq_s16 (__a, __b);
18158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18159 vdup_laneq_s32 (int32x4_t __a, const int __b)
18161 return __aarch64_vdup_laneq_s32 (__a, __b);
18164 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18165 vdup_laneq_s64 (int64x2_t __a, const int __b)
18167 return __aarch64_vdup_laneq_s64 (__a, __b);
18170 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18171 vdup_laneq_u8 (uint8x16_t __a, const int __b)
18173 return __aarch64_vdup_laneq_u8 (__a, __b);
18176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18177 vdup_laneq_u16 (uint16x8_t __a, const int __b)
18179 return __aarch64_vdup_laneq_u16 (__a, __b);
18182 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18183 vdup_laneq_u32 (uint32x4_t __a, const int __b)
18185 return __aarch64_vdup_laneq_u32 (__a, __b);
18188 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18189 vdup_laneq_u64 (uint64x2_t __a, const int __b)
18191 return __aarch64_vdup_laneq_u64 (__a, __b);
18194 /* vdupq_lane */
18195 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18196 vdupq_lane_f32 (float32x2_t __a, const int __b)
18198 return __aarch64_vdupq_lane_f32 (__a, __b);
18201 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18202 vdupq_lane_f64 (float64x1_t __a, const int __b)
18204 return __aarch64_vdupq_lane_f64 (__a, __b);
18207 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18208 vdupq_lane_p8 (poly8x8_t __a, const int __b)
18210 return __aarch64_vdupq_lane_p8 (__a, __b);
18213 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18214 vdupq_lane_p16 (poly16x4_t __a, const int __b)
18216 return __aarch64_vdupq_lane_p16 (__a, __b);
18219 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18220 vdupq_lane_s8 (int8x8_t __a, const int __b)
18222 return __aarch64_vdupq_lane_s8 (__a, __b);
18225 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18226 vdupq_lane_s16 (int16x4_t __a, const int __b)
18228 return __aarch64_vdupq_lane_s16 (__a, __b);
18231 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18232 vdupq_lane_s32 (int32x2_t __a, const int __b)
18234 return __aarch64_vdupq_lane_s32 (__a, __b);
18237 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18238 vdupq_lane_s64 (int64x1_t __a, const int __b)
18240 return __aarch64_vdupq_lane_s64 (__a, __b);
18243 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18244 vdupq_lane_u8 (uint8x8_t __a, const int __b)
18246 return __aarch64_vdupq_lane_u8 (__a, __b);
18249 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18250 vdupq_lane_u16 (uint16x4_t __a, const int __b)
18252 return __aarch64_vdupq_lane_u16 (__a, __b);
18255 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18256 vdupq_lane_u32 (uint32x2_t __a, const int __b)
18258 return __aarch64_vdupq_lane_u32 (__a, __b);
18261 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18262 vdupq_lane_u64 (uint64x1_t __a, const int __b)
18264 return __aarch64_vdupq_lane_u64 (__a, __b);
18267 /* vdupq_laneq */
18268 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18269 vdupq_laneq_f32 (float32x4_t __a, const int __b)
18271 return __aarch64_vdupq_laneq_f32 (__a, __b);
18274 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18275 vdupq_laneq_f64 (float64x2_t __a, const int __b)
18277 return __aarch64_vdupq_laneq_f64 (__a, __b);
18280 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18281 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
18283 return __aarch64_vdupq_laneq_p8 (__a, __b);
18286 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18287 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
18289 return __aarch64_vdupq_laneq_p16 (__a, __b);
18292 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18293 vdupq_laneq_s8 (int8x16_t __a, const int __b)
18295 return __aarch64_vdupq_laneq_s8 (__a, __b);
18298 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18299 vdupq_laneq_s16 (int16x8_t __a, const int __b)
18301 return __aarch64_vdupq_laneq_s16 (__a, __b);
18304 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18305 vdupq_laneq_s32 (int32x4_t __a, const int __b)
18307 return __aarch64_vdupq_laneq_s32 (__a, __b);
18310 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18311 vdupq_laneq_s64 (int64x2_t __a, const int __b)
18313 return __aarch64_vdupq_laneq_s64 (__a, __b);
18316 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18317 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
18319 return __aarch64_vdupq_laneq_u8 (__a, __b);
18322 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18323 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
18325 return __aarch64_vdupq_laneq_u16 (__a, __b);
18328 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18329 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
18331 return __aarch64_vdupq_laneq_u32 (__a, __b);
18334 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18335 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
18337 return __aarch64_vdupq_laneq_u64 (__a, __b);
18340 /* vdupb_lane */
18341 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18342 vdupb_lane_p8 (poly8x8_t __a, const int __b)
18344 return __aarch64_vget_lane_p8 (__a, __b);
18347 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18348 vdupb_lane_s8 (int8x8_t __a, const int __b)
18350 return __aarch64_vget_lane_s8 (__a, __b);
18353 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18354 vdupb_lane_u8 (uint8x8_t __a, const int __b)
18356 return __aarch64_vget_lane_u8 (__a, __b);
18359 /* vduph_lane */
18360 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18361 vduph_lane_p16 (poly16x4_t __a, const int __b)
18363 return __aarch64_vget_lane_p16 (__a, __b);
18366 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18367 vduph_lane_s16 (int16x4_t __a, const int __b)
18369 return __aarch64_vget_lane_s16 (__a, __b);
18372 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18373 vduph_lane_u16 (uint16x4_t __a, const int __b)
18375 return __aarch64_vget_lane_u16 (__a, __b);
18378 /* vdups_lane */
18379 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18380 vdups_lane_f32 (float32x2_t __a, const int __b)
18382 return __aarch64_vget_lane_f32 (__a, __b);
18385 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18386 vdups_lane_s32 (int32x2_t __a, const int __b)
18388 return __aarch64_vget_lane_s32 (__a, __b);
18391 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18392 vdups_lane_u32 (uint32x2_t __a, const int __b)
18394 return __aarch64_vget_lane_u32 (__a, __b);
18397 /* vdupd_lane */
18398 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18399 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
18401 return __a;
18404 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18405 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
18407 return __a;
18410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18411 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
18413 return __a;
18416 /* vdupb_laneq */
18417 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18418 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
18420 return __aarch64_vgetq_lane_p8 (__a, __b);
18423 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18424 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
18426 return __aarch64_vgetq_lane_s8 (__a, __b);
18429 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18430 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
18432 return __aarch64_vgetq_lane_u8 (__a, __b);
18435 /* vduph_laneq */
18436 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18437 vduph_laneq_p16 (poly16x8_t __a, const int __b)
18439 return __aarch64_vgetq_lane_p16 (__a, __b);
18442 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18443 vduph_laneq_s16 (int16x8_t __a, const int __b)
18445 return __aarch64_vgetq_lane_s16 (__a, __b);
18448 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18449 vduph_laneq_u16 (uint16x8_t __a, const int __b)
18451 return __aarch64_vgetq_lane_u16 (__a, __b);
18454 /* vdups_laneq */
18455 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18456 vdups_laneq_f32 (float32x4_t __a, const int __b)
18458 return __aarch64_vgetq_lane_f32 (__a, __b);
18461 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18462 vdups_laneq_s32 (int32x4_t __a, const int __b)
18464 return __aarch64_vgetq_lane_s32 (__a, __b);
18467 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18468 vdups_laneq_u32 (uint32x4_t __a, const int __b)
18470 return __aarch64_vgetq_lane_u32 (__a, __b);
18473 /* vdupd_laneq */
18474 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18475 vdupd_laneq_f64 (float64x2_t __a, const int __b)
18477 return __aarch64_vgetq_lane_f64 (__a, __b);
18480 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18481 vdupd_laneq_s64 (int64x2_t __a, const int __b)
18483 return __aarch64_vgetq_lane_s64 (__a, __b);
18486 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18487 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
18489 return __aarch64_vgetq_lane_u64 (__a, __b);
18492 /* vfma_lane */
18494 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18495 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
18496 float32x2_t __c, const int __lane)
18498 return __builtin_aarch64_fmav2sf (__b,
18499 __aarch64_vdup_lane_f32 (__c, __lane),
18500 __a);
18503 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18504 vfma_lane_f64 (float64_t __a, float64_t __b,
18505 float64_t __c, const int __lane)
18507 return __builtin_fma (__b, __c, __a);
18510 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18511 vfmad_lane_f64 (float64_t __a, float64_t __b,
18512 float64_t __c, const int __lane)
18514 return __builtin_fma (__b, __c, __a);
18517 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18518 vfmas_lane_f32 (float32_t __a, float32_t __b,
18519 float32x2_t __c, const int __lane)
18521 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18524 /* vfma_laneq */
18526 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18527 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
18528 float32x4_t __c, const int __lane)
18530 return __builtin_aarch64_fmav2sf (__b,
18531 __aarch64_vdup_laneq_f32 (__c, __lane),
18532 __a);
18535 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18536 vfma_laneq_f64 (float64_t __a, float64_t __b,
18537 float64x2_t __c, const int __lane)
18539 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18542 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18543 vfmad_laneq_f64 (float64_t __a, float64_t __b,
18544 float64x2_t __c, const int __lane)
18546 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18549 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18550 vfmas_laneq_f32 (float32_t __a, float32_t __b,
18551 float32x4_t __c, const int __lane)
18553 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18556 /* vfmaq_lane */
18558 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18559 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18560 float32x2_t __c, const int __lane)
18562 return __builtin_aarch64_fmav4sf (__b,
18563 __aarch64_vdupq_lane_f32 (__c, __lane),
18564 __a);
18567 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18568 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
18569 float64_t __c, const int __lane)
18571 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
18574 /* vfmaq_laneq */
18576 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18577 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18578 float32x4_t __c, const int __lane)
18580 return __builtin_aarch64_fmav4sf (__b,
18581 __aarch64_vdupq_laneq_f32 (__c, __lane),
18582 __a);
18585 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18586 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18587 float64x2_t __c, const int __lane)
18589 return __builtin_aarch64_fmav2df (__b,
18590 __aarch64_vdupq_laneq_f64 (__c, __lane),
18591 __a);
18594 /* vfms_lane */
18596 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18597 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
18598 float32x2_t __c, const int __lane)
18600 return __builtin_aarch64_fmav2sf (-__b,
18601 __aarch64_vdup_lane_f32 (__c, __lane),
18602 __a);
18605 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18606 vfms_lane_f64 (float64_t __a, float64_t __b,
18607 float64_t __c, const int __lane)
18609 return __builtin_fma (-__b, __c, __a);
18612 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18613 vfmsd_lane_f64 (float64_t __a, float64_t __b,
18614 float64_t __c, const int __lane)
18616 return __builtin_fma (-__b, __c, __a);
18619 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18620 vfmss_lane_f32 (float32_t __a, float32_t __b,
18621 float32x2_t __c, const int __lane)
18623 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18626 /* vfms_laneq */
18628 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18629 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
18630 float32x4_t __c, const int __lane)
18632 return __builtin_aarch64_fmav2sf (-__b,
18633 __aarch64_vdup_laneq_f32 (__c, __lane),
18634 __a);
18637 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18638 vfms_laneq_f64 (float64_t __a, float64_t __b,
18639 float64x2_t __c, const int __lane)
18641 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18644 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18645 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
18646 float64x2_t __c, const int __lane)
18648 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18651 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18652 vfmss_laneq_f32 (float32_t __a, float32_t __b,
18653 float32x4_t __c, const int __lane)
18655 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18658 /* vfmsq_lane */
18660 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18661 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18662 float32x2_t __c, const int __lane)
18664 return __builtin_aarch64_fmav4sf (-__b,
18665 __aarch64_vdupq_lane_f32 (__c, __lane),
18666 __a);
18669 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18670 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
18671 float64_t __c, const int __lane)
18673 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
18676 /* vfmsq_laneq */
18678 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18679 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18680 float32x4_t __c, const int __lane)
18682 return __builtin_aarch64_fmav4sf (-__b,
18683 __aarch64_vdupq_laneq_f32 (__c, __lane),
18684 __a);
18687 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18688 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18689 float64x2_t __c, const int __lane)
18691 return __builtin_aarch64_fmav2df (-__b,
18692 __aarch64_vdupq_laneq_f64 (__c, __lane),
18693 __a);
18696 /* vld1 */
18698 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18699 vld1_f32 (const float32_t *a)
18701 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
18704 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18705 vld1_f64 (const float64_t *a)
18707 return *a;
18710 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18711 vld1_p8 (const poly8_t *a)
18713 return (poly8x8_t)
18714 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18717 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18718 vld1_p16 (const poly16_t *a)
18720 return (poly16x4_t)
18721 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18724 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18725 vld1_s8 (const int8_t *a)
18727 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18730 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18731 vld1_s16 (const int16_t *a)
18733 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18737 vld1_s32 (const int32_t *a)
18739 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18742 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18743 vld1_s64 (const int64_t *a)
18745 return *a;
18748 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18749 vld1_u8 (const uint8_t *a)
18751 return (uint8x8_t)
18752 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18755 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18756 vld1_u16 (const uint16_t *a)
18758 return (uint16x4_t)
18759 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18762 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18763 vld1_u32 (const uint32_t *a)
18765 return (uint32x2_t)
18766 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18769 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18770 vld1_u64 (const uint64_t *a)
18772 return *a;
18775 /* vld1q */
18777 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18778 vld1q_f32 (const float32_t *a)
18780 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
18783 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18784 vld1q_f64 (const float64_t *a)
18786 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
18789 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18790 vld1q_p8 (const poly8_t *a)
18792 return (poly8x16_t)
18793 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18796 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18797 vld1q_p16 (const poly16_t *a)
18799 return (poly16x8_t)
18800 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18803 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18804 vld1q_s8 (const int8_t *a)
18806 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18809 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18810 vld1q_s16 (const int16_t *a)
18812 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18815 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18816 vld1q_s32 (const int32_t *a)
18818 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18821 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18822 vld1q_s64 (const int64_t *a)
18824 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18827 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18828 vld1q_u8 (const uint8_t *a)
18830 return (uint8x16_t)
18831 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18835 vld1q_u16 (const uint16_t *a)
18837 return (uint16x8_t)
18838 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18841 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18842 vld1q_u32 (const uint32_t *a)
18844 return (uint32x4_t)
18845 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18848 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18849 vld1q_u64 (const uint64_t *a)
18851 return (uint64x2_t)
18852 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18855 /* vldn */
18857 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
18858 vld2_s64 (const int64_t * __a)
18860 int64x1x2_t ret;
18861 __builtin_aarch64_simd_oi __o;
18862 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18863 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18864 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18865 return ret;
18868 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
18869 vld2_u64 (const uint64_t * __a)
18871 uint64x1x2_t ret;
18872 __builtin_aarch64_simd_oi __o;
18873 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18874 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18875 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18876 return ret;
18879 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
18880 vld2_f64 (const float64_t * __a)
18882 float64x1x2_t ret;
18883 __builtin_aarch64_simd_oi __o;
18884 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
18885 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
18886 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
18887 return ret;
18890 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
18891 vld2_s8 (const int8_t * __a)
18893 int8x8x2_t ret;
18894 __builtin_aarch64_simd_oi __o;
18895 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18896 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18897 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18898 return ret;
18901 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
18902 vld2_p8 (const poly8_t * __a)
18904 poly8x8x2_t ret;
18905 __builtin_aarch64_simd_oi __o;
18906 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18907 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18908 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18909 return ret;
18912 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
18913 vld2_s16 (const int16_t * __a)
18915 int16x4x2_t ret;
18916 __builtin_aarch64_simd_oi __o;
18917 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18918 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18919 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18920 return ret;
18923 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
18924 vld2_p16 (const poly16_t * __a)
18926 poly16x4x2_t ret;
18927 __builtin_aarch64_simd_oi __o;
18928 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18929 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18930 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18931 return ret;
18934 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
18935 vld2_s32 (const int32_t * __a)
18937 int32x2x2_t ret;
18938 __builtin_aarch64_simd_oi __o;
18939 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18940 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18941 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18942 return ret;
18945 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
18946 vld2_u8 (const uint8_t * __a)
18948 uint8x8x2_t ret;
18949 __builtin_aarch64_simd_oi __o;
18950 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18951 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18952 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18953 return ret;
18956 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
18957 vld2_u16 (const uint16_t * __a)
18959 uint16x4x2_t ret;
18960 __builtin_aarch64_simd_oi __o;
18961 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18962 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18963 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18964 return ret;
18967 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
18968 vld2_u32 (const uint32_t * __a)
18970 uint32x2x2_t ret;
18971 __builtin_aarch64_simd_oi __o;
18972 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18973 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18974 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18975 return ret;
18978 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
18979 vld2_f32 (const float32_t * __a)
18981 float32x2x2_t ret;
18982 __builtin_aarch64_simd_oi __o;
18983 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
18984 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
18985 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
18986 return ret;
18989 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
18990 vld2q_s8 (const int8_t * __a)
18992 int8x16x2_t ret;
18993 __builtin_aarch64_simd_oi __o;
18994 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
18995 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
18996 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
18997 return ret;
19000 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
19001 vld2q_p8 (const poly8_t * __a)
19003 poly8x16x2_t ret;
19004 __builtin_aarch64_simd_oi __o;
19005 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19006 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19007 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19008 return ret;
19011 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
19012 vld2q_s16 (const int16_t * __a)
19014 int16x8x2_t ret;
19015 __builtin_aarch64_simd_oi __o;
19016 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19017 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19018 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19019 return ret;
19022 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
19023 vld2q_p16 (const poly16_t * __a)
19025 poly16x8x2_t ret;
19026 __builtin_aarch64_simd_oi __o;
19027 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19028 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19029 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19030 return ret;
19033 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
19034 vld2q_s32 (const int32_t * __a)
19036 int32x4x2_t ret;
19037 __builtin_aarch64_simd_oi __o;
19038 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19039 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19040 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19041 return ret;
19044 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
19045 vld2q_s64 (const int64_t * __a)
19047 int64x2x2_t ret;
19048 __builtin_aarch64_simd_oi __o;
19049 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19050 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19051 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19052 return ret;
19055 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
19056 vld2q_u8 (const uint8_t * __a)
19058 uint8x16x2_t ret;
19059 __builtin_aarch64_simd_oi __o;
19060 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19061 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19062 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19063 return ret;
19066 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
19067 vld2q_u16 (const uint16_t * __a)
19069 uint16x8x2_t ret;
19070 __builtin_aarch64_simd_oi __o;
19071 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19072 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19073 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19074 return ret;
19077 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
19078 vld2q_u32 (const uint32_t * __a)
19080 uint32x4x2_t ret;
19081 __builtin_aarch64_simd_oi __o;
19082 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19083 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19084 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19085 return ret;
19088 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
19089 vld2q_u64 (const uint64_t * __a)
19091 uint64x2x2_t ret;
19092 __builtin_aarch64_simd_oi __o;
19093 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19094 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19095 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19096 return ret;
19099 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
19100 vld2q_f32 (const float32_t * __a)
19102 float32x4x2_t ret;
19103 __builtin_aarch64_simd_oi __o;
19104 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
19105 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
19106 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
19107 return ret;
19110 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
19111 vld2q_f64 (const float64_t * __a)
19113 float64x2x2_t ret;
19114 __builtin_aarch64_simd_oi __o;
19115 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
19116 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
19117 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
19118 return ret;
19121 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
19122 vld3_s64 (const int64_t * __a)
19124 int64x1x3_t ret;
19125 __builtin_aarch64_simd_ci __o;
19126 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19127 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19128 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19129 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19130 return ret;
19133 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
19134 vld3_u64 (const uint64_t * __a)
19136 uint64x1x3_t ret;
19137 __builtin_aarch64_simd_ci __o;
19138 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19139 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19140 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19141 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19142 return ret;
19145 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
19146 vld3_f64 (const float64_t * __a)
19148 float64x1x3_t ret;
19149 __builtin_aarch64_simd_ci __o;
19150 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
19151 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
19152 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
19153 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
19154 return ret;
19157 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
19158 vld3_s8 (const int8_t * __a)
19160 int8x8x3_t ret;
19161 __builtin_aarch64_simd_ci __o;
19162 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19163 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19164 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19165 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19166 return ret;
19169 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
19170 vld3_p8 (const poly8_t * __a)
19172 poly8x8x3_t ret;
19173 __builtin_aarch64_simd_ci __o;
19174 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19175 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19176 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19177 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19178 return ret;
19181 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
19182 vld3_s16 (const int16_t * __a)
19184 int16x4x3_t ret;
19185 __builtin_aarch64_simd_ci __o;
19186 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19187 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19188 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19189 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19190 return ret;
19193 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
19194 vld3_p16 (const poly16_t * __a)
19196 poly16x4x3_t ret;
19197 __builtin_aarch64_simd_ci __o;
19198 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19199 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19200 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19201 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19202 return ret;
19205 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
19206 vld3_s32 (const int32_t * __a)
19208 int32x2x3_t ret;
19209 __builtin_aarch64_simd_ci __o;
19210 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19211 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19212 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19213 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19214 return ret;
19217 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
19218 vld3_u8 (const uint8_t * __a)
19220 uint8x8x3_t ret;
19221 __builtin_aarch64_simd_ci __o;
19222 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19223 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19224 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19225 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19226 return ret;
19229 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
19230 vld3_u16 (const uint16_t * __a)
19232 uint16x4x3_t ret;
19233 __builtin_aarch64_simd_ci __o;
19234 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19235 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19236 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19237 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19238 return ret;
19241 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
19242 vld3_u32 (const uint32_t * __a)
19244 uint32x2x3_t ret;
19245 __builtin_aarch64_simd_ci __o;
19246 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19247 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19248 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19249 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19250 return ret;
19253 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
19254 vld3_f32 (const float32_t * __a)
19256 float32x2x3_t ret;
19257 __builtin_aarch64_simd_ci __o;
19258 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
19259 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
19260 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
19261 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
19262 return ret;
19265 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
19266 vld3q_s8 (const int8_t * __a)
19268 int8x16x3_t ret;
19269 __builtin_aarch64_simd_ci __o;
19270 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19271 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19272 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19273 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19274 return ret;
19277 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
19278 vld3q_p8 (const poly8_t * __a)
19280 poly8x16x3_t ret;
19281 __builtin_aarch64_simd_ci __o;
19282 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19283 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19284 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19285 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19286 return ret;
19289 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
19290 vld3q_s16 (const int16_t * __a)
19292 int16x8x3_t ret;
19293 __builtin_aarch64_simd_ci __o;
19294 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19295 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19296 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19297 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19298 return ret;
19301 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
19302 vld3q_p16 (const poly16_t * __a)
19304 poly16x8x3_t ret;
19305 __builtin_aarch64_simd_ci __o;
19306 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19307 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19308 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19309 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19310 return ret;
19313 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
19314 vld3q_s32 (const int32_t * __a)
19316 int32x4x3_t ret;
19317 __builtin_aarch64_simd_ci __o;
19318 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19319 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19320 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19321 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19322 return ret;
19325 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
19326 vld3q_s64 (const int64_t * __a)
19328 int64x2x3_t ret;
19329 __builtin_aarch64_simd_ci __o;
19330 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19331 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19332 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19333 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19334 return ret;
19337 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
19338 vld3q_u8 (const uint8_t * __a)
19340 uint8x16x3_t ret;
19341 __builtin_aarch64_simd_ci __o;
19342 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19343 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19344 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19345 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19346 return ret;
19349 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
19350 vld3q_u16 (const uint16_t * __a)
19352 uint16x8x3_t ret;
19353 __builtin_aarch64_simd_ci __o;
19354 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19355 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19356 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19357 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19358 return ret;
19361 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
19362 vld3q_u32 (const uint32_t * __a)
19364 uint32x4x3_t ret;
19365 __builtin_aarch64_simd_ci __o;
19366 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19367 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19368 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19369 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19370 return ret;
19373 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
19374 vld3q_u64 (const uint64_t * __a)
19376 uint64x2x3_t ret;
19377 __builtin_aarch64_simd_ci __o;
19378 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19379 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19380 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19381 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19382 return ret;
19385 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
19386 vld3q_f32 (const float32_t * __a)
19388 float32x4x3_t ret;
19389 __builtin_aarch64_simd_ci __o;
19390 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
19391 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
19392 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
19393 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
19394 return ret;
19397 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
19398 vld3q_f64 (const float64_t * __a)
19400 float64x2x3_t ret;
19401 __builtin_aarch64_simd_ci __o;
19402 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
19403 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
19404 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
19405 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
19406 return ret;
19409 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
19410 vld4_s64 (const int64_t * __a)
19412 int64x1x4_t ret;
19413 __builtin_aarch64_simd_xi __o;
19414 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19415 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19416 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19417 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19418 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19419 return ret;
19422 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
19423 vld4_u64 (const uint64_t * __a)
19425 uint64x1x4_t ret;
19426 __builtin_aarch64_simd_xi __o;
19427 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19428 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19429 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19430 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19431 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19432 return ret;
19435 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
19436 vld4_f64 (const float64_t * __a)
19438 float64x1x4_t ret;
19439 __builtin_aarch64_simd_xi __o;
19440 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
19441 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
19442 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
19443 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
19444 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
19445 return ret;
19448 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
19449 vld4_s8 (const int8_t * __a)
19451 int8x8x4_t ret;
19452 __builtin_aarch64_simd_xi __o;
19453 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19454 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19455 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19456 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19457 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19458 return ret;
19461 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
19462 vld4_p8 (const poly8_t * __a)
19464 poly8x8x4_t ret;
19465 __builtin_aarch64_simd_xi __o;
19466 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19467 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19468 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19469 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19470 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19471 return ret;
19474 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
19475 vld4_s16 (const int16_t * __a)
19477 int16x4x4_t ret;
19478 __builtin_aarch64_simd_xi __o;
19479 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19480 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19481 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19482 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19483 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19484 return ret;
19487 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
19488 vld4_p16 (const poly16_t * __a)
19490 poly16x4x4_t ret;
19491 __builtin_aarch64_simd_xi __o;
19492 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19493 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19494 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19495 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19496 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19497 return ret;
19500 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
19501 vld4_s32 (const int32_t * __a)
19503 int32x2x4_t ret;
19504 __builtin_aarch64_simd_xi __o;
19505 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19506 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19507 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19508 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19509 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19510 return ret;
19513 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
19514 vld4_u8 (const uint8_t * __a)
19516 uint8x8x4_t ret;
19517 __builtin_aarch64_simd_xi __o;
19518 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19519 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19520 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19521 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19522 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19523 return ret;
19526 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
19527 vld4_u16 (const uint16_t * __a)
19529 uint16x4x4_t ret;
19530 __builtin_aarch64_simd_xi __o;
19531 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19532 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19533 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19534 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19535 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19536 return ret;
19539 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
19540 vld4_u32 (const uint32_t * __a)
19542 uint32x2x4_t ret;
19543 __builtin_aarch64_simd_xi __o;
19544 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19545 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19546 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19547 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19548 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19549 return ret;
19552 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
19553 vld4_f32 (const float32_t * __a)
19555 float32x2x4_t ret;
19556 __builtin_aarch64_simd_xi __o;
19557 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
19558 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
19559 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
19560 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
19561 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
19562 return ret;
19565 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
19566 vld4q_s8 (const int8_t * __a)
19568 int8x16x4_t ret;
19569 __builtin_aarch64_simd_xi __o;
19570 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19571 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19572 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19573 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19574 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19575 return ret;
19578 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
19579 vld4q_p8 (const poly8_t * __a)
19581 poly8x16x4_t ret;
19582 __builtin_aarch64_simd_xi __o;
19583 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19584 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19585 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19586 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19587 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19588 return ret;
19591 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
19592 vld4q_s16 (const int16_t * __a)
19594 int16x8x4_t ret;
19595 __builtin_aarch64_simd_xi __o;
19596 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19597 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19598 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19599 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19600 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19601 return ret;
19604 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
19605 vld4q_p16 (const poly16_t * __a)
19607 poly16x8x4_t ret;
19608 __builtin_aarch64_simd_xi __o;
19609 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19610 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19611 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19612 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19613 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19614 return ret;
19617 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
19618 vld4q_s32 (const int32_t * __a)
19620 int32x4x4_t ret;
19621 __builtin_aarch64_simd_xi __o;
19622 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19623 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19624 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19625 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19626 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19627 return ret;
19630 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
19631 vld4q_s64 (const int64_t * __a)
19633 int64x2x4_t ret;
19634 __builtin_aarch64_simd_xi __o;
19635 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19636 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19637 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19638 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19639 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19640 return ret;
19643 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
19644 vld4q_u8 (const uint8_t * __a)
19646 uint8x16x4_t ret;
19647 __builtin_aarch64_simd_xi __o;
19648 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19649 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19650 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19651 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19652 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19653 return ret;
19656 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
19657 vld4q_u16 (const uint16_t * __a)
19659 uint16x8x4_t ret;
19660 __builtin_aarch64_simd_xi __o;
19661 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19662 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19663 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19664 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19665 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19666 return ret;
19669 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
19670 vld4q_u32 (const uint32_t * __a)
19672 uint32x4x4_t ret;
19673 __builtin_aarch64_simd_xi __o;
19674 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19675 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19676 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19677 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19678 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19679 return ret;
19682 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
19683 vld4q_u64 (const uint64_t * __a)
19685 uint64x2x4_t ret;
19686 __builtin_aarch64_simd_xi __o;
19687 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19688 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19689 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19690 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19691 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19692 return ret;
19695 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
19696 vld4q_f32 (const float32_t * __a)
19698 float32x4x4_t ret;
19699 __builtin_aarch64_simd_xi __o;
19700 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
19701 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
19702 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
19703 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
19704 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
19705 return ret;
19708 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
19709 vld4q_f64 (const float64_t * __a)
19711 float64x2x4_t ret;
19712 __builtin_aarch64_simd_xi __o;
19713 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
19714 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
19715 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
19716 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
19717 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
19718 return ret;
19721 /* vmax */
19723 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19724 vmax_f32 (float32x2_t __a, float32x2_t __b)
19726 return __builtin_aarch64_smax_nanv2sf (__a, __b);
19729 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19730 vmax_s8 (int8x8_t __a, int8x8_t __b)
19732 return __builtin_aarch64_smaxv8qi (__a, __b);
19735 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19736 vmax_s16 (int16x4_t __a, int16x4_t __b)
19738 return __builtin_aarch64_smaxv4hi (__a, __b);
19741 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19742 vmax_s32 (int32x2_t __a, int32x2_t __b)
19744 return __builtin_aarch64_smaxv2si (__a, __b);
19747 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19748 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
19750 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
19751 (int8x8_t) __b);
19754 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19755 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
19757 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
19758 (int16x4_t) __b);
19761 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19762 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
19764 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
19765 (int32x2_t) __b);
19768 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19769 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
19771 return __builtin_aarch64_smax_nanv4sf (__a, __b);
19774 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19775 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
19777 return __builtin_aarch64_smax_nanv2df (__a, __b);
19780 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19781 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
19783 return __builtin_aarch64_smaxv16qi (__a, __b);
19786 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19787 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
19789 return __builtin_aarch64_smaxv8hi (__a, __b);
19792 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19793 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
19795 return __builtin_aarch64_smaxv4si (__a, __b);
19798 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19799 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
19801 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
19802 (int8x16_t) __b);
19805 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19806 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
19808 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
19809 (int16x8_t) __b);
19812 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19813 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
19815 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
19816 (int32x4_t) __b);
19819 /* vmaxnm */
19821 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19822 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
19824 return __builtin_aarch64_smaxv2sf (__a, __b);
19827 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19828 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
19830 return __builtin_aarch64_smaxv4sf (__a, __b);
19833 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19834 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
19836 return __builtin_aarch64_smaxv2df (__a, __b);
19839 /* vmaxv */
19841 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19842 vmaxv_f32 (float32x2_t __a)
19844 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
19848 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19849 vmaxv_s8 (int8x8_t __a)
19851 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
19854 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19855 vmaxv_s16 (int16x4_t __a)
19857 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
19860 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19861 vmaxv_s32 (int32x2_t __a)
19863 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
19866 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19867 vmaxv_u8 (uint8x8_t __a)
19869 return vget_lane_u8 ((uint8x8_t)
19870 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
19874 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19875 vmaxv_u16 (uint16x4_t __a)
19877 return vget_lane_u16 ((uint16x4_t)
19878 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
19882 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19883 vmaxv_u32 (uint32x2_t __a)
19885 return vget_lane_u32 ((uint32x2_t)
19886 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
19890 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19891 vmaxvq_f32 (float32x4_t __a)
19893 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
19897 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19898 vmaxvq_f64 (float64x2_t __a)
19900 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
19904 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19905 vmaxvq_s8 (int8x16_t __a)
19907 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
19910 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19911 vmaxvq_s16 (int16x8_t __a)
19913 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
19916 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19917 vmaxvq_s32 (int32x4_t __a)
19919 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
19922 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19923 vmaxvq_u8 (uint8x16_t __a)
19925 return vgetq_lane_u8 ((uint8x16_t)
19926 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
19930 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19931 vmaxvq_u16 (uint16x8_t __a)
19933 return vgetq_lane_u16 ((uint16x8_t)
19934 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
19938 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19939 vmaxvq_u32 (uint32x4_t __a)
19941 return vgetq_lane_u32 ((uint32x4_t)
19942 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
19946 /* vmaxnmv */
19948 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19949 vmaxnmv_f32 (float32x2_t __a)
19951 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
19955 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19956 vmaxnmvq_f32 (float32x4_t __a)
19958 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
19961 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19962 vmaxnmvq_f64 (float64x2_t __a)
19964 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
19967 /* vmin */
19969 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19970 vmin_f32 (float32x2_t __a, float32x2_t __b)
19972 return __builtin_aarch64_smin_nanv2sf (__a, __b);
19975 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19976 vmin_s8 (int8x8_t __a, int8x8_t __b)
19978 return __builtin_aarch64_sminv8qi (__a, __b);
19981 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19982 vmin_s16 (int16x4_t __a, int16x4_t __b)
19984 return __builtin_aarch64_sminv4hi (__a, __b);
19987 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19988 vmin_s32 (int32x2_t __a, int32x2_t __b)
19990 return __builtin_aarch64_sminv2si (__a, __b);
19993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19994 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
19996 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
19997 (int8x8_t) __b);
20000 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20001 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
20003 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
20004 (int16x4_t) __b);
20007 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20008 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
20010 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
20011 (int32x2_t) __b);
20014 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20015 vminq_f32 (float32x4_t __a, float32x4_t __b)
20017 return __builtin_aarch64_smin_nanv4sf (__a, __b);
20020 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20021 vminq_f64 (float64x2_t __a, float64x2_t __b)
20023 return __builtin_aarch64_smin_nanv2df (__a, __b);
20026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20027 vminq_s8 (int8x16_t __a, int8x16_t __b)
20029 return __builtin_aarch64_sminv16qi (__a, __b);
20032 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20033 vminq_s16 (int16x8_t __a, int16x8_t __b)
20035 return __builtin_aarch64_sminv8hi (__a, __b);
20038 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20039 vminq_s32 (int32x4_t __a, int32x4_t __b)
20041 return __builtin_aarch64_sminv4si (__a, __b);
20044 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20045 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
20047 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
20048 (int8x16_t) __b);
20051 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20052 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
20054 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
20055 (int16x8_t) __b);
20058 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20059 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
20061 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
20062 (int32x4_t) __b);
20065 /* vminnm */
20067 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20068 vminnm_f32 (float32x2_t __a, float32x2_t __b)
20070 return __builtin_aarch64_sminv2sf (__a, __b);
20073 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20074 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
20076 return __builtin_aarch64_sminv4sf (__a, __b);
20079 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20080 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
20082 return __builtin_aarch64_sminv2df (__a, __b);
20085 /* vminv */
20087 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20088 vminv_f32 (float32x2_t __a)
20090 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
20094 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20095 vminv_s8 (int8x8_t __a)
20097 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
20101 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20102 vminv_s16 (int16x4_t __a)
20104 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
20107 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20108 vminv_s32 (int32x2_t __a)
20110 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
20113 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20114 vminv_u8 (uint8x8_t __a)
20116 return vget_lane_u8 ((uint8x8_t)
20117 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
20121 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20122 vminv_u16 (uint16x4_t __a)
20124 return vget_lane_u16 ((uint16x4_t)
20125 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
20129 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20130 vminv_u32 (uint32x2_t __a)
20132 return vget_lane_u32 ((uint32x2_t)
20133 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
20137 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20138 vminvq_f32 (float32x4_t __a)
20140 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
20144 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20145 vminvq_f64 (float64x2_t __a)
20147 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
20151 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20152 vminvq_s8 (int8x16_t __a)
20154 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
20157 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20158 vminvq_s16 (int16x8_t __a)
20160 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
20163 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20164 vminvq_s32 (int32x4_t __a)
20166 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
20169 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20170 vminvq_u8 (uint8x16_t __a)
20172 return vgetq_lane_u8 ((uint8x16_t)
20173 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
20177 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20178 vminvq_u16 (uint16x8_t __a)
20180 return vgetq_lane_u16 ((uint16x8_t)
20181 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
20185 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20186 vminvq_u32 (uint32x4_t __a)
20188 return vgetq_lane_u32 ((uint32x4_t)
20189 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
20193 /* vminnmv */
20195 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20196 vminnmv_f32 (float32x2_t __a)
20198 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
20201 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20202 vminnmvq_f32 (float32x4_t __a)
20204 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
20207 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20208 vminnmvq_f64 (float64x2_t __a)
20210 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
20213 /* vmla */
20215 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20216 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20218 return a + b * c;
20221 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20222 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20224 return a + b * c;
20227 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20228 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20230 return a + b * c;
20233 /* vmla_lane */
20235 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20236 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
20237 float32x2_t __c, const int __lane)
20239 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20242 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20243 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
20244 int16x4_t __c, const int __lane)
20246 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20249 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20250 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
20251 int32x2_t __c, const int __lane)
20253 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20256 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20257 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20258 uint16x4_t __c, const int __lane)
20260 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20263 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20264 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20265 uint32x2_t __c, const int __lane)
20267 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20270 /* vmla_laneq */
20272 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20273 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
20274 float32x4_t __c, const int __lane)
20276 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20279 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20280 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
20281 int16x8_t __c, const int __lane)
20283 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20286 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20287 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
20288 int32x4_t __c, const int __lane)
20290 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20293 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20294 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20295 uint16x8_t __c, const int __lane)
20297 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20300 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20301 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20302 uint32x4_t __c, const int __lane)
20304 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20307 /* vmlaq_lane */
20309 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20310 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
20311 float32x2_t __c, const int __lane)
20313 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20316 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20317 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
20318 int16x4_t __c, const int __lane)
20320 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20323 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20324 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
20325 int32x2_t __c, const int __lane)
20327 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20330 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20331 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20332 uint16x4_t __c, const int __lane)
20334 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20337 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20338 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20339 uint32x2_t __c, const int __lane)
20341 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20344 /* vmlaq_laneq */
20346 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20347 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20348 float32x4_t __c, const int __lane)
20350 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20353 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20354 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20355 int16x8_t __c, const int __lane)
20357 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20360 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20361 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20362 int32x4_t __c, const int __lane)
20364 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20367 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20368 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20369 uint16x8_t __c, const int __lane)
20371 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20374 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20375 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20376 uint32x4_t __c, const int __lane)
20378 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20381 /* vmls */
20383 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20384 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20386 return a - b * c;
20389 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20390 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20392 return a - b * c;
20395 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20396 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20398 return a - b * c;
20401 /* vmls_lane */
20403 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20404 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
20405 float32x2_t __c, const int __lane)
20407 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20410 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20411 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
20412 int16x4_t __c, const int __lane)
20414 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20417 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20418 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
20419 int32x2_t __c, const int __lane)
20421 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20424 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20425 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20426 uint16x4_t __c, const int __lane)
20428 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20431 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20432 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20433 uint32x2_t __c, const int __lane)
20435 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20438 /* vmls_laneq */
20440 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20441 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
20442 float32x4_t __c, const int __lane)
20444 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20447 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20448 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
20449 int16x8_t __c, const int __lane)
20451 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20454 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20455 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
20456 int32x4_t __c, const int __lane)
20458 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20461 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20462 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20463 uint16x8_t __c, const int __lane)
20465 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20468 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20469 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20470 uint32x4_t __c, const int __lane)
20472 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20475 /* vmlsq_lane */
20477 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20478 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
20479 float32x2_t __c, const int __lane)
20481 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20484 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20485 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
20486 int16x4_t __c, const int __lane)
20488 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20491 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20492 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
20493 int32x2_t __c, const int __lane)
20495 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20498 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20499 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20500 uint16x4_t __c, const int __lane)
20502 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20505 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20506 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20507 uint32x2_t __c, const int __lane)
20509 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20512 /* vmlsq_laneq */
20514 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20515 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20516 float32x4_t __c, const int __lane)
20518 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20521 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20522 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20523 int16x8_t __c, const int __lane)
20525 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20528 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20529 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20530 int32x4_t __c, const int __lane)
20532 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20535 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20536 uint16x8_t __c, const int __lane)
20538 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20541 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20542 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20543 uint32x4_t __c, const int __lane)
20545 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20548 /* vmov_n_ */
20550 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20551 vmov_n_f32 (float32_t __a)
20553 return vdup_n_f32 (__a);
20556 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20557 vmov_n_f64 (float64_t __a)
20559 return __a;
20562 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20563 vmov_n_p8 (poly8_t __a)
20565 return vdup_n_p8 (__a);
20568 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20569 vmov_n_p16 (poly16_t __a)
20571 return vdup_n_p16 (__a);
20574 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20575 vmov_n_s8 (int8_t __a)
20577 return vdup_n_s8 (__a);
20580 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20581 vmov_n_s16 (int16_t __a)
20583 return vdup_n_s16 (__a);
20586 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20587 vmov_n_s32 (int32_t __a)
20589 return vdup_n_s32 (__a);
20592 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20593 vmov_n_s64 (int64_t __a)
20595 return __a;
20598 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20599 vmov_n_u8 (uint8_t __a)
20601 return vdup_n_u8 (__a);
20604 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20605 vmov_n_u16 (uint16_t __a)
20607 return vdup_n_u16 (__a);
20610 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20611 vmov_n_u32 (uint32_t __a)
20613 return vdup_n_u32 (__a);
20616 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20617 vmov_n_u64 (uint64_t __a)
20619 return __a;
20622 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20623 vmovq_n_f32 (float32_t __a)
20625 return vdupq_n_f32 (__a);
20628 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20629 vmovq_n_f64 (float64_t __a)
20631 return vdupq_n_f64 (__a);
20634 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20635 vmovq_n_p8 (poly8_t __a)
20637 return vdupq_n_p8 (__a);
20640 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20641 vmovq_n_p16 (poly16_t __a)
20643 return vdupq_n_p16 (__a);
20646 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20647 vmovq_n_s8 (int8_t __a)
20649 return vdupq_n_s8 (__a);
20652 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20653 vmovq_n_s16 (int16_t __a)
20655 return vdupq_n_s16 (__a);
20658 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20659 vmovq_n_s32 (int32_t __a)
20661 return vdupq_n_s32 (__a);
20664 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20665 vmovq_n_s64 (int64_t __a)
20667 return vdupq_n_s64 (__a);
20670 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20671 vmovq_n_u8 (uint8_t __a)
20673 return vdupq_n_u8 (__a);
20676 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20677 vmovq_n_u16 (uint16_t __a)
20679 return vdupq_n_u16 (__a);
20682 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20683 vmovq_n_u32 (uint32_t __a)
20685 return vdupq_n_u32 (__a);
20688 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20689 vmovq_n_u64 (uint64_t __a)
20691 return vdupq_n_u64 (__a);
20694 /* vmul_lane */
20696 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20697 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
20699 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20702 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20703 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
20705 return __a * __b;
20708 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20709 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
20711 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20714 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20715 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
20717 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20720 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20721 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
20723 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20726 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20727 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
20729 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20732 /* vmul_laneq */
20734 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20735 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
20737 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20740 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20741 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
20743 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20746 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20747 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
20749 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20752 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20753 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
20755 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20758 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20759 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
20761 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20764 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20765 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
20767 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20770 /* vmulq_lane */
20772 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20773 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
20775 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20778 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20779 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
20781 return __a * __b;
20784 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20785 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
20787 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20790 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20791 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
20793 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20796 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20797 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
20799 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20803 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
20805 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20808 /* vmulq_laneq */
20810 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20811 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
20813 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20816 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20817 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
20819 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20822 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20823 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
20825 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20828 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20829 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
20831 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20835 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
20837 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20841 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
20843 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20846 /* vneg */
20848 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20849 vneg_f32 (float32x2_t __a)
20851 return -__a;
20854 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20855 vneg_f64 (float64x1_t __a)
20857 return -__a;
20860 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20861 vneg_s8 (int8x8_t __a)
20863 return -__a;
20866 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20867 vneg_s16 (int16x4_t __a)
20869 return -__a;
20872 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20873 vneg_s32 (int32x2_t __a)
20875 return -__a;
20878 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20879 vneg_s64 (int64x1_t __a)
20881 return -__a;
20884 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20885 vnegq_f32 (float32x4_t __a)
20887 return -__a;
20890 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20891 vnegq_f64 (float64x2_t __a)
20893 return -__a;
20896 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20897 vnegq_s8 (int8x16_t __a)
20899 return -__a;
20902 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20903 vnegq_s16 (int16x8_t __a)
20905 return -__a;
20908 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20909 vnegq_s32 (int32x4_t __a)
20911 return -__a;
20914 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20915 vnegq_s64 (int64x2_t __a)
20917 return -__a;
20920 /* vqabs */
20922 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20923 vqabsq_s64 (int64x2_t __a)
20925 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
20928 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20929 vqabsb_s8 (int8x1_t __a)
20931 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
20934 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20935 vqabsh_s16 (int16x1_t __a)
20937 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
20940 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20941 vqabss_s32 (int32x1_t __a)
20943 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
20946 /* vqadd */
20948 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20949 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
20951 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
20954 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20955 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
20957 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
20960 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20961 vqadds_s32 (int32x1_t __a, int32x1_t __b)
20963 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
20966 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20967 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
20969 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
20972 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20973 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
20975 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
20978 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20979 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
20981 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
20984 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20985 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
20987 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
20990 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20991 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
20993 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
20996 /* vqdmlal */
20998 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20999 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21001 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21005 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21007 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21010 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21011 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21012 int const __d)
21014 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21017 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21018 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21019 int const __d)
21021 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21025 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21027 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21030 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21031 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21033 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21034 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21037 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21038 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21040 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21044 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21046 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21049 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21050 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21052 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21055 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21056 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21058 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21062 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21063 int const __d)
21065 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21069 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21070 int const __d)
21072 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21075 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21076 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21078 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21082 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21084 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21085 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21088 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21089 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21091 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21094 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21095 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21097 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21100 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21101 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21103 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21106 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21107 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21109 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21112 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21113 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21115 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21118 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21119 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21121 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21124 /* vqdmlsl */
21126 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21127 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21129 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21132 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21133 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21135 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21138 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21139 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21140 int const __d)
21142 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21145 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21146 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21147 int const __d)
21149 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21152 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21153 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21155 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21158 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21159 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21161 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21162 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21165 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21166 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21168 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21171 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21172 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21174 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21177 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21178 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21180 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21183 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21184 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21186 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21189 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21190 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21191 int const __d)
21193 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21196 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21197 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21198 int const __d)
21200 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21203 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21204 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21206 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21209 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21210 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21212 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21213 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21216 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21217 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21219 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21222 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21223 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21225 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21228 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21229 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21231 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21234 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21235 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21237 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21240 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21241 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21243 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21246 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21247 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21249 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21252 /* vqdmulh */
21254 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21255 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21257 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21260 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21261 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21263 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21266 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21267 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21269 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21273 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21275 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21278 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21279 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21281 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21284 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21285 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21287 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21290 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21291 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21293 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21296 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21297 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21299 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21302 /* vqdmull */
21304 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21305 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21307 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21311 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21313 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21317 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21319 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21323 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21325 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21329 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21331 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21335 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21337 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
21338 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21341 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21342 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21344 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21347 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21348 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21350 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21353 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21354 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21356 return __builtin_aarch64_sqdmullv2si (__a, __b);
21359 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21360 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21362 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21365 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21366 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21368 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21371 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21372 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21374 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21377 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21378 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21380 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21383 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21384 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21386 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
21387 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21390 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21391 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21393 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21396 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21397 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21399 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21402 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21403 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21405 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21408 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21409 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21411 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21414 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21415 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21417 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21420 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21421 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21423 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21426 /* vqmovn */
21428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21429 vqmovn_s16 (int16x8_t __a)
21431 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21434 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21435 vqmovn_s32 (int32x4_t __a)
21437 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21441 vqmovn_s64 (int64x2_t __a)
21443 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21446 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21447 vqmovn_u16 (uint16x8_t __a)
21449 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21452 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21453 vqmovn_u32 (uint32x4_t __a)
21455 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21459 vqmovn_u64 (uint64x2_t __a)
21461 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21464 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21465 vqmovnh_s16 (int16x1_t __a)
21467 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21470 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21471 vqmovns_s32 (int32x1_t __a)
21473 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21476 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21477 vqmovnd_s64 (int64x1_t __a)
21479 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21482 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21483 vqmovnh_u16 (uint16x1_t __a)
21485 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21488 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21489 vqmovns_u32 (uint32x1_t __a)
21491 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21494 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21495 vqmovnd_u64 (uint64x1_t __a)
21497 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21500 /* vqmovun */
21502 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21503 vqmovun_s16 (int16x8_t __a)
21505 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21508 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21509 vqmovun_s32 (int32x4_t __a)
21511 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21514 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21515 vqmovun_s64 (int64x2_t __a)
21517 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21520 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21521 vqmovunh_s16 (int16x1_t __a)
21523 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21526 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21527 vqmovuns_s32 (int32x1_t __a)
21529 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21532 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21533 vqmovund_s64 (int64x1_t __a)
21535 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21538 /* vqneg */
21540 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21541 vqnegq_s64 (int64x2_t __a)
21543 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21546 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21547 vqnegb_s8 (int8x1_t __a)
21549 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21552 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21553 vqnegh_s16 (int16x1_t __a)
21555 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21558 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21559 vqnegs_s32 (int32x1_t __a)
21561 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21564 /* vqrdmulh */
21566 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21567 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21569 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21572 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21573 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21575 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21578 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21579 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21581 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21584 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21585 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21587 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21590 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21591 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21593 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21596 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21597 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21599 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21602 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21603 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21605 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
21608 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21609 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21611 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
21614 /* vqrshl */
21616 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21617 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
21619 return __builtin_aarch64_sqrshlv8qi (__a, __b);
21622 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21623 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
21625 return __builtin_aarch64_sqrshlv4hi (__a, __b);
21628 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21629 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
21631 return __builtin_aarch64_sqrshlv2si (__a, __b);
21634 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21635 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
21637 return __builtin_aarch64_sqrshldi (__a, __b);
21640 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21641 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
21643 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
21646 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21647 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
21649 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
21652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21653 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
21655 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
21658 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21659 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
21661 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
21664 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21665 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
21667 return __builtin_aarch64_sqrshlv16qi (__a, __b);
21670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21671 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
21673 return __builtin_aarch64_sqrshlv8hi (__a, __b);
21676 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21677 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
21679 return __builtin_aarch64_sqrshlv4si (__a, __b);
21682 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21683 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
21685 return __builtin_aarch64_sqrshlv2di (__a, __b);
21688 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21689 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21691 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
21694 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21695 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21697 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
21700 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21701 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21703 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
21706 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21707 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21709 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
21712 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21713 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
21715 return __builtin_aarch64_sqrshlqi (__a, __b);
21718 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21719 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
21721 return __builtin_aarch64_sqrshlhi (__a, __b);
21724 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21725 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
21727 return __builtin_aarch64_sqrshlsi (__a, __b);
21730 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21731 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
21733 return __builtin_aarch64_sqrshldi (__a, __b);
21736 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21737 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21739 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
21742 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21743 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21745 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
21748 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21749 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
21751 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
21754 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21755 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
21757 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
21760 /* vqrshrn */
21762 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21763 vqrshrn_n_s16 (int16x8_t __a, const int __b)
21765 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
21768 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21769 vqrshrn_n_s32 (int32x4_t __a, const int __b)
21771 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
21774 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21775 vqrshrn_n_s64 (int64x2_t __a, const int __b)
21777 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
21780 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21781 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
21783 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
21786 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21787 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
21789 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
21792 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21793 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
21795 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
21798 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21799 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
21801 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
21804 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21805 vqrshrns_n_s32 (int32x1_t __a, const int __b)
21807 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
21810 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21811 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
21813 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
21816 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21817 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
21819 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
21822 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21823 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
21825 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
21828 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21829 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
21831 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
21834 /* vqrshrun */
21836 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21837 vqrshrun_n_s16 (int16x8_t __a, const int __b)
21839 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
21842 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21843 vqrshrun_n_s32 (int32x4_t __a, const int __b)
21845 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
21848 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21849 vqrshrun_n_s64 (int64x2_t __a, const int __b)
21851 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
21854 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21855 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
21857 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
21860 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21861 vqrshruns_n_s32 (int32x1_t __a, const int __b)
21863 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
21866 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21867 vqrshrund_n_s64 (int64x1_t __a, const int __b)
21869 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
21872 /* vqshl */
21874 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21875 vqshl_s8 (int8x8_t __a, int8x8_t __b)
21877 return __builtin_aarch64_sqshlv8qi (__a, __b);
21880 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21881 vqshl_s16 (int16x4_t __a, int16x4_t __b)
21883 return __builtin_aarch64_sqshlv4hi (__a, __b);
21886 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21887 vqshl_s32 (int32x2_t __a, int32x2_t __b)
21889 return __builtin_aarch64_sqshlv2si (__a, __b);
21892 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21893 vqshl_s64 (int64x1_t __a, int64x1_t __b)
21895 return __builtin_aarch64_sqshldi (__a, __b);
21898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21899 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
21901 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
21904 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21905 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
21907 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
21910 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21911 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
21913 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
21916 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21917 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
21919 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
21922 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21923 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
21925 return __builtin_aarch64_sqshlv16qi (__a, __b);
21928 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21929 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
21931 return __builtin_aarch64_sqshlv8hi (__a, __b);
21934 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21935 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
21937 return __builtin_aarch64_sqshlv4si (__a, __b);
21940 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21941 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
21943 return __builtin_aarch64_sqshlv2di (__a, __b);
21946 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21947 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
21949 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
21952 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21953 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
21955 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
21958 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21959 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
21961 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
21964 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21965 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
21967 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
21970 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21971 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
21973 return __builtin_aarch64_sqshlqi (__a, __b);
21976 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21977 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
21979 return __builtin_aarch64_sqshlhi (__a, __b);
21982 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21983 vqshls_s32 (int32x1_t __a, int32x1_t __b)
21985 return __builtin_aarch64_sqshlsi (__a, __b);
21988 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21989 vqshld_s64 (int64x1_t __a, int64x1_t __b)
21991 return __builtin_aarch64_sqshldi (__a, __b);
21994 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21995 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21997 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22000 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22001 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22003 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22006 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22007 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22009 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22012 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22013 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22015 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22018 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22019 vqshl_n_s8 (int8x8_t __a, const int __b)
22021 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22024 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22025 vqshl_n_s16 (int16x4_t __a, const int __b)
22027 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22030 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22031 vqshl_n_s32 (int32x2_t __a, const int __b)
22033 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22036 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22037 vqshl_n_s64 (int64x1_t __a, const int __b)
22039 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22042 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22043 vqshl_n_u8 (uint8x8_t __a, const int __b)
22045 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22049 vqshl_n_u16 (uint16x4_t __a, const int __b)
22051 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22055 vqshl_n_u32 (uint32x2_t __a, const int __b)
22057 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22060 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22061 vqshl_n_u64 (uint64x1_t __a, const int __b)
22063 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22066 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22067 vqshlq_n_s8 (int8x16_t __a, const int __b)
22069 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22072 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22073 vqshlq_n_s16 (int16x8_t __a, const int __b)
22075 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22078 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22079 vqshlq_n_s32 (int32x4_t __a, const int __b)
22081 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22085 vqshlq_n_s64 (int64x2_t __a, const int __b)
22087 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22090 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22091 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22093 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22096 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22097 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22099 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22102 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22103 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22105 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22108 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22109 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22111 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22114 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22115 vqshlb_n_s8 (int8x1_t __a, const int __b)
22117 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22120 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22121 vqshlh_n_s16 (int16x1_t __a, const int __b)
22123 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22126 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22127 vqshls_n_s32 (int32x1_t __a, const int __b)
22129 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22132 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22133 vqshld_n_s64 (int64x1_t __a, const int __b)
22135 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22138 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22139 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22141 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22144 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22145 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22147 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22150 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22151 vqshls_n_u32 (uint32x1_t __a, const int __b)
22153 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22156 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22157 vqshld_n_u64 (uint64x1_t __a, const int __b)
22159 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22162 /* vqshlu */
22164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22165 vqshlu_n_s8 (int8x8_t __a, const int __b)
22167 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22171 vqshlu_n_s16 (int16x4_t __a, const int __b)
22173 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22176 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22177 vqshlu_n_s32 (int32x2_t __a, const int __b)
22179 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22182 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22183 vqshlu_n_s64 (int64x1_t __a, const int __b)
22185 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22188 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22189 vqshluq_n_s8 (int8x16_t __a, const int __b)
22191 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22194 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22195 vqshluq_n_s16 (int16x8_t __a, const int __b)
22197 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22200 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22201 vqshluq_n_s32 (int32x4_t __a, const int __b)
22203 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22206 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22207 vqshluq_n_s64 (int64x2_t __a, const int __b)
22209 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22212 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22213 vqshlub_n_s8 (int8x1_t __a, const int __b)
22215 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22218 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22219 vqshluh_n_s16 (int16x1_t __a, const int __b)
22221 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22224 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22225 vqshlus_n_s32 (int32x1_t __a, const int __b)
22227 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22230 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22231 vqshlud_n_s64 (int64x1_t __a, const int __b)
22233 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22236 /* vqshrn */
22238 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22239 vqshrn_n_s16 (int16x8_t __a, const int __b)
22241 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22245 vqshrn_n_s32 (int32x4_t __a, const int __b)
22247 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22251 vqshrn_n_s64 (int64x2_t __a, const int __b)
22253 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22257 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22259 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22262 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22263 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22265 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22268 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22269 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22271 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22274 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22275 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22277 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22280 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22281 vqshrns_n_s32 (int32x1_t __a, const int __b)
22283 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22286 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22287 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22289 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22292 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22293 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22295 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22298 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22299 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22301 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22304 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22305 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22307 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22310 /* vqshrun */
22312 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22313 vqshrun_n_s16 (int16x8_t __a, const int __b)
22315 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22318 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22319 vqshrun_n_s32 (int32x4_t __a, const int __b)
22321 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22324 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22325 vqshrun_n_s64 (int64x2_t __a, const int __b)
22327 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22330 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22331 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22333 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22336 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22337 vqshruns_n_s32 (int32x1_t __a, const int __b)
22339 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22342 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22343 vqshrund_n_s64 (int64x1_t __a, const int __b)
22345 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22348 /* vqsub */
22350 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22351 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22353 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22356 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22357 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22359 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22362 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22363 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22365 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22368 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22369 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22371 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22374 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22375 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22377 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22380 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22381 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22383 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22386 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22387 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22389 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22392 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22393 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22395 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22398 /* vrecpe */
22400 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22401 vrecpes_f32 (float32_t __a)
22403 return __builtin_aarch64_frecpesf (__a);
22406 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22407 vrecped_f64 (float64_t __a)
22409 return __builtin_aarch64_frecpedf (__a);
22412 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22413 vrecpe_f32 (float32x2_t __a)
22415 return __builtin_aarch64_frecpev2sf (__a);
22418 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22419 vrecpeq_f32 (float32x4_t __a)
22421 return __builtin_aarch64_frecpev4sf (__a);
22424 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22425 vrecpeq_f64 (float64x2_t __a)
22427 return __builtin_aarch64_frecpev2df (__a);
22430 /* vrecps */
22432 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22433 vrecpss_f32 (float32_t __a, float32_t __b)
22435 return __builtin_aarch64_frecpssf (__a, __b);
22438 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22439 vrecpsd_f64 (float64_t __a, float64_t __b)
22441 return __builtin_aarch64_frecpsdf (__a, __b);
22444 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22445 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22447 return __builtin_aarch64_frecpsv2sf (__a, __b);
22450 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22451 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22453 return __builtin_aarch64_frecpsv4sf (__a, __b);
22456 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22457 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22459 return __builtin_aarch64_frecpsv2df (__a, __b);
22462 /* vrecpx */
22464 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22465 vrecpxs_f32 (float32_t __a)
22467 return __builtin_aarch64_frecpxsf (__a);
22470 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22471 vrecpxd_f64 (float64_t __a)
22473 return __builtin_aarch64_frecpxdf (__a);
22476 /* vrnd */
22478 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22479 vrnd_f32 (float32x2_t __a)
22481 return __builtin_aarch64_btruncv2sf (__a);
22484 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22485 vrndq_f32 (float32x4_t __a)
22487 return __builtin_aarch64_btruncv4sf (__a);
22490 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22491 vrndq_f64 (float64x2_t __a)
22493 return __builtin_aarch64_btruncv2df (__a);
22496 /* vrnda */
22498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22499 vrnda_f32 (float32x2_t __a)
22501 return __builtin_aarch64_roundv2sf (__a);
22504 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22505 vrndaq_f32 (float32x4_t __a)
22507 return __builtin_aarch64_roundv4sf (__a);
22510 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22511 vrndaq_f64 (float64x2_t __a)
22513 return __builtin_aarch64_roundv2df (__a);
22516 /* vrndi */
22518 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22519 vrndi_f32 (float32x2_t __a)
22521 return __builtin_aarch64_nearbyintv2sf (__a);
22524 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22525 vrndiq_f32 (float32x4_t __a)
22527 return __builtin_aarch64_nearbyintv4sf (__a);
22530 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22531 vrndiq_f64 (float64x2_t __a)
22533 return __builtin_aarch64_nearbyintv2df (__a);
22536 /* vrndm */
22538 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22539 vrndm_f32 (float32x2_t __a)
22541 return __builtin_aarch64_floorv2sf (__a);
22544 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22545 vrndmq_f32 (float32x4_t __a)
22547 return __builtin_aarch64_floorv4sf (__a);
22550 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22551 vrndmq_f64 (float64x2_t __a)
22553 return __builtin_aarch64_floorv2df (__a);
22556 /* vrndn */
22558 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22559 vrndn_f32 (float32x2_t __a)
22561 return __builtin_aarch64_frintnv2sf (__a);
22563 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22564 vrndnq_f32 (float32x4_t __a)
22566 return __builtin_aarch64_frintnv4sf (__a);
22569 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22570 vrndnq_f64 (float64x2_t __a)
22572 return __builtin_aarch64_frintnv2df (__a);
22575 /* vrndp */
22577 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22578 vrndp_f32 (float32x2_t __a)
22580 return __builtin_aarch64_ceilv2sf (__a);
22583 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22584 vrndpq_f32 (float32x4_t __a)
22586 return __builtin_aarch64_ceilv4sf (__a);
22589 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22590 vrndpq_f64 (float64x2_t __a)
22592 return __builtin_aarch64_ceilv2df (__a);
22595 /* vrndx */
22597 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22598 vrndx_f32 (float32x2_t __a)
22600 return __builtin_aarch64_rintv2sf (__a);
22603 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22604 vrndxq_f32 (float32x4_t __a)
22606 return __builtin_aarch64_rintv4sf (__a);
22609 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22610 vrndxq_f64 (float64x2_t __a)
22612 return __builtin_aarch64_rintv2df (__a);
22615 /* vrshl */
22617 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22618 vrshl_s8 (int8x8_t __a, int8x8_t __b)
22620 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
22623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22624 vrshl_s16 (int16x4_t __a, int16x4_t __b)
22626 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
22629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22630 vrshl_s32 (int32x2_t __a, int32x2_t __b)
22632 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
22635 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22636 vrshl_s64 (int64x1_t __a, int64x1_t __b)
22638 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22641 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22642 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
22644 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
22647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22648 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
22650 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
22653 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22654 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
22656 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
22659 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22660 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
22662 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
22665 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22666 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
22668 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
22671 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22672 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
22674 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
22677 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22678 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
22680 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
22683 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22684 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
22686 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
22689 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22690 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22692 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
22695 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22696 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22698 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
22701 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22702 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22704 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
22707 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22708 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22710 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
22713 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22714 vrshld_s64 (int64x1_t __a, int64x1_t __b)
22716 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22719 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22720 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22722 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
22725 /* vrshr */
22727 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22728 vrshr_n_s8 (int8x8_t __a, const int __b)
22730 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
22733 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22734 vrshr_n_s16 (int16x4_t __a, const int __b)
22736 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
22739 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22740 vrshr_n_s32 (int32x2_t __a, const int __b)
22742 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
22745 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22746 vrshr_n_s64 (int64x1_t __a, const int __b)
22748 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22751 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22752 vrshr_n_u8 (uint8x8_t __a, const int __b)
22754 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
22757 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22758 vrshr_n_u16 (uint16x4_t __a, const int __b)
22760 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
22763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22764 vrshr_n_u32 (uint32x2_t __a, const int __b)
22766 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
22769 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22770 vrshr_n_u64 (uint64x1_t __a, const int __b)
22772 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
22775 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22776 vrshrq_n_s8 (int8x16_t __a, const int __b)
22778 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
22781 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22782 vrshrq_n_s16 (int16x8_t __a, const int __b)
22784 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
22787 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22788 vrshrq_n_s32 (int32x4_t __a, const int __b)
22790 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
22793 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22794 vrshrq_n_s64 (int64x2_t __a, const int __b)
22796 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
22799 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22800 vrshrq_n_u8 (uint8x16_t __a, const int __b)
22802 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
22805 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22806 vrshrq_n_u16 (uint16x8_t __a, const int __b)
22808 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
22811 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22812 vrshrq_n_u32 (uint32x4_t __a, const int __b)
22814 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
22817 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22818 vrshrq_n_u64 (uint64x2_t __a, const int __b)
22820 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
22823 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22824 vrshrd_n_s64 (int64x1_t __a, const int __b)
22826 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22829 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22830 vrshrd_n_u64 (uint64x1_t __a, const int __b)
22832 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
22835 /* vrsra */
22837 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22838 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22840 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
22843 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22844 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22846 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
22849 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22850 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22852 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
22855 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22856 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22858 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22861 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22862 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22864 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
22865 (int8x8_t) __b, __c);
22868 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22869 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22871 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
22872 (int16x4_t) __b, __c);
22875 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22876 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22878 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
22879 (int32x2_t) __b, __c);
22882 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22883 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22885 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
22886 (int64x1_t) __b, __c);
22889 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22890 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22892 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
22895 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22896 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22898 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
22901 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22902 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22904 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
22907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22908 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22910 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
22913 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22914 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22916 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
22917 (int8x16_t) __b, __c);
22920 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22921 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22923 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
22924 (int16x8_t) __b, __c);
22927 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22928 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22930 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
22931 (int32x4_t) __b, __c);
22934 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22935 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22937 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
22938 (int64x2_t) __b, __c);
22941 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22942 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22944 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22948 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22950 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
22953 #ifdef __ARM_FEATURE_CRYPTO
22955 /* vsha1 */
22957 static __inline uint32x4_t
22958 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22960 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
22962 static __inline uint32x4_t
22963 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22965 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
22967 static __inline uint32x4_t
22968 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22970 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
22973 static __inline uint32_t
22974 vsha1h_u32 (uint32_t hash_e)
22976 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
22979 static __inline uint32x4_t
22980 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
22982 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
22985 static __inline uint32x4_t
22986 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
22988 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
22991 static __inline uint32x4_t
22992 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
22994 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
22997 static __inline uint32x4_t
22998 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
23000 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
23003 static __inline uint32x4_t
23004 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
23006 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
23009 static __inline uint32x4_t
23010 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
23012 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
23015 static __inline poly128_t
23016 vmull_p64 (poly64_t a, poly64_t b)
23018 return
23019 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
23022 static __inline poly128_t
23023 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
23025 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
23028 #endif
23030 /* vshl */
23032 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23033 vshl_n_s8 (int8x8_t __a, const int __b)
23035 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23038 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23039 vshl_n_s16 (int16x4_t __a, const int __b)
23041 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23044 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23045 vshl_n_s32 (int32x2_t __a, const int __b)
23047 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23050 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23051 vshl_n_s64 (int64x1_t __a, const int __b)
23053 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23056 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23057 vshl_n_u8 (uint8x8_t __a, const int __b)
23059 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23062 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23063 vshl_n_u16 (uint16x4_t __a, const int __b)
23065 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23068 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23069 vshl_n_u32 (uint32x2_t __a, const int __b)
23071 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23074 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23075 vshl_n_u64 (uint64x1_t __a, const int __b)
23077 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23080 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23081 vshlq_n_s8 (int8x16_t __a, const int __b)
23083 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23086 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23087 vshlq_n_s16 (int16x8_t __a, const int __b)
23089 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23092 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23093 vshlq_n_s32 (int32x4_t __a, const int __b)
23095 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23098 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23099 vshlq_n_s64 (int64x2_t __a, const int __b)
23101 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23105 vshlq_n_u8 (uint8x16_t __a, const int __b)
23107 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23110 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23111 vshlq_n_u16 (uint16x8_t __a, const int __b)
23113 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23116 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23117 vshlq_n_u32 (uint32x4_t __a, const int __b)
23119 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23122 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23123 vshlq_n_u64 (uint64x2_t __a, const int __b)
23125 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23128 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23129 vshld_n_s64 (int64x1_t __a, const int __b)
23131 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23134 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23135 vshld_n_u64 (uint64x1_t __a, const int __b)
23137 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23140 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23141 vshl_s8 (int8x8_t __a, int8x8_t __b)
23143 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23146 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23147 vshl_s16 (int16x4_t __a, int16x4_t __b)
23149 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23152 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23153 vshl_s32 (int32x2_t __a, int32x2_t __b)
23155 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23158 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23159 vshl_s64 (int64x1_t __a, int64x1_t __b)
23161 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23164 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23165 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23167 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23171 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23173 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23176 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23177 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23179 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23182 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23183 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23185 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23188 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23189 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23191 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23194 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23195 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23197 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23200 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23201 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23203 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23206 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23207 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23209 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23212 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23213 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23215 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23218 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23219 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23221 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23225 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23227 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23230 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23231 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23233 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23237 vshld_s64 (int64x1_t __a, int64x1_t __b)
23239 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23242 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23243 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23245 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23248 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23249 vshll_high_n_s8 (int8x16_t __a, const int __b)
23251 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23254 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23255 vshll_high_n_s16 (int16x8_t __a, const int __b)
23257 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23260 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23261 vshll_high_n_s32 (int32x4_t __a, const int __b)
23263 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23266 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23267 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23269 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23272 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23273 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23275 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23278 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23279 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23281 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23284 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23285 vshll_n_s8 (int8x8_t __a, const int __b)
23287 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23291 vshll_n_s16 (int16x4_t __a, const int __b)
23293 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23296 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23297 vshll_n_s32 (int32x2_t __a, const int __b)
23299 return __builtin_aarch64_sshll_nv2si (__a, __b);
23302 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23303 vshll_n_u8 (uint8x8_t __a, const int __b)
23305 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23309 vshll_n_u16 (uint16x4_t __a, const int __b)
23311 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23314 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23315 vshll_n_u32 (uint32x2_t __a, const int __b)
23317 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23320 /* vshr */
23322 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23323 vshr_n_s8 (int8x8_t __a, const int __b)
23325 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23328 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23329 vshr_n_s16 (int16x4_t __a, const int __b)
23331 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23334 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23335 vshr_n_s32 (int32x2_t __a, const int __b)
23337 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23340 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23341 vshr_n_s64 (int64x1_t __a, const int __b)
23343 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
23346 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23347 vshr_n_u8 (uint8x8_t __a, const int __b)
23349 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23352 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23353 vshr_n_u16 (uint16x4_t __a, const int __b)
23355 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23358 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23359 vshr_n_u32 (uint32x2_t __a, const int __b)
23361 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23364 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23365 vshr_n_u64 (uint64x1_t __a, const int __b)
23367 return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
23370 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23371 vshrq_n_s8 (int8x16_t __a, const int __b)
23373 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23376 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23377 vshrq_n_s16 (int16x8_t __a, const int __b)
23379 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23382 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23383 vshrq_n_s32 (int32x4_t __a, const int __b)
23385 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23388 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23389 vshrq_n_s64 (int64x2_t __a, const int __b)
23391 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23394 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23395 vshrq_n_u8 (uint8x16_t __a, const int __b)
23397 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23400 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23401 vshrq_n_u16 (uint16x8_t __a, const int __b)
23403 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23406 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23407 vshrq_n_u32 (uint32x4_t __a, const int __b)
23409 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23412 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23413 vshrq_n_u64 (uint64x2_t __a, const int __b)
23415 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23418 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23419 vshrd_n_s64 (int64x1_t __a, const int __b)
23421 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
23424 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23425 vshrd_n_u64 (uint64_t __a, const int __b)
23427 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
23430 /* vsli */
23432 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23433 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23435 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23438 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23439 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23441 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23444 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23445 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23447 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23450 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23451 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23453 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23456 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23457 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23459 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23460 (int8x8_t) __b, __c);
23463 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23464 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23466 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23467 (int16x4_t) __b, __c);
23470 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23471 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23473 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23474 (int32x2_t) __b, __c);
23477 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23478 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23480 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23481 (int64x1_t) __b, __c);
23484 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23485 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23487 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23490 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23491 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23493 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23496 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23497 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23499 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23502 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23503 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23505 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23508 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23509 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23511 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23512 (int8x16_t) __b, __c);
23515 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23516 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23518 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23519 (int16x8_t) __b, __c);
23522 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23523 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23525 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23526 (int32x4_t) __b, __c);
23529 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23530 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23532 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23533 (int64x2_t) __b, __c);
23536 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23537 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23539 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23542 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23543 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23545 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23548 /* vsqadd */
23550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23551 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23553 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23554 (int8x8_t) __b);
23557 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23558 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23560 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23561 (int16x4_t) __b);
23564 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23565 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23567 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23568 (int32x2_t) __b);
23571 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23572 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23574 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23577 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23578 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23580 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23581 (int8x16_t) __b);
23584 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23585 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23587 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23588 (int16x8_t) __b);
23591 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23592 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23594 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23595 (int32x4_t) __b);
23598 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23599 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23601 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23602 (int64x2_t) __b);
23605 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23606 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23608 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23611 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23612 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23614 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23617 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23618 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23620 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23623 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23624 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23626 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23629 /* vsqrt */
23630 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23631 vsqrt_f32 (float32x2_t a)
23633 return __builtin_aarch64_sqrtv2sf (a);
23636 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23637 vsqrtq_f32 (float32x4_t a)
23639 return __builtin_aarch64_sqrtv4sf (a);
23642 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23643 vsqrtq_f64 (float64x2_t a)
23645 return __builtin_aarch64_sqrtv2df (a);
23648 /* vsra */
23650 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23651 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23653 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23656 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23657 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23659 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23662 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23663 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23665 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23668 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23669 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23671 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23674 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23675 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23677 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23678 (int8x8_t) __b, __c);
23681 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23682 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23684 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
23685 (int16x4_t) __b, __c);
23688 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23689 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23691 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
23692 (int32x2_t) __b, __c);
23695 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23696 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23698 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
23699 (int64x1_t) __b, __c);
23702 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23703 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23705 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
23708 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23709 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23711 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
23714 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23715 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23717 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
23720 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23721 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23723 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
23726 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23727 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23729 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
23730 (int8x16_t) __b, __c);
23733 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23734 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23736 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
23737 (int16x8_t) __b, __c);
23740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23741 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23743 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
23744 (int32x4_t) __b, __c);
23747 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23748 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23750 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
23751 (int64x2_t) __b, __c);
23754 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23755 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23757 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23760 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23761 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23763 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
23766 /* vsri */
23768 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23769 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23771 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
23774 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23775 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23777 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
23780 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23781 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23783 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
23786 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23787 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23789 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23792 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23793 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23795 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
23796 (int8x8_t) __b, __c);
23799 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23800 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23802 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
23803 (int16x4_t) __b, __c);
23806 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23807 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23809 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
23810 (int32x2_t) __b, __c);
23813 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23814 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23816 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
23817 (int64x1_t) __b, __c);
23820 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23821 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23823 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
23826 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23827 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23829 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
23832 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23833 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23835 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
23838 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23839 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23841 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
23844 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23845 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23847 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
23848 (int8x16_t) __b, __c);
23851 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23852 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23854 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
23855 (int16x8_t) __b, __c);
23858 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23859 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23861 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
23862 (int32x4_t) __b, __c);
23865 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23866 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23868 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
23869 (int64x2_t) __b, __c);
23872 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23873 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23875 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23878 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23879 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23881 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
23884 /* vst1 */
23886 __extension__ static __inline void __attribute__ ((__always_inline__))
23887 vst1_f32 (float32_t *a, float32x2_t b)
23889 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
23892 __extension__ static __inline void __attribute__ ((__always_inline__))
23893 vst1_f64 (float64_t *a, float64x1_t b)
23895 *a = b;
23898 __extension__ static __inline void __attribute__ ((__always_inline__))
23899 vst1_p8 (poly8_t *a, poly8x8_t b)
23901 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23902 (int8x8_t) b);
23905 __extension__ static __inline void __attribute__ ((__always_inline__))
23906 vst1_p16 (poly16_t *a, poly16x4_t b)
23908 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23909 (int16x4_t) b);
23912 __extension__ static __inline void __attribute__ ((__always_inline__))
23913 vst1_s8 (int8_t *a, int8x8_t b)
23915 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
23918 __extension__ static __inline void __attribute__ ((__always_inline__))
23919 vst1_s16 (int16_t *a, int16x4_t b)
23921 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
23924 __extension__ static __inline void __attribute__ ((__always_inline__))
23925 vst1_s32 (int32_t *a, int32x2_t b)
23927 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
23930 __extension__ static __inline void __attribute__ ((__always_inline__))
23931 vst1_s64 (int64_t *a, int64x1_t b)
23933 *a = b;
23936 __extension__ static __inline void __attribute__ ((__always_inline__))
23937 vst1_u8 (uint8_t *a, uint8x8_t b)
23939 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23940 (int8x8_t) b);
23943 __extension__ static __inline void __attribute__ ((__always_inline__))
23944 vst1_u16 (uint16_t *a, uint16x4_t b)
23946 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23947 (int16x4_t) b);
23950 __extension__ static __inline void __attribute__ ((__always_inline__))
23951 vst1_u32 (uint32_t *a, uint32x2_t b)
23953 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
23954 (int32x2_t) b);
23957 __extension__ static __inline void __attribute__ ((__always_inline__))
23958 vst1_u64 (uint64_t *a, uint64x1_t b)
23960 *a = b;
23963 __extension__ static __inline void __attribute__ ((__always_inline__))
23964 vst1q_f32 (float32_t *a, float32x4_t b)
23966 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
23969 __extension__ static __inline void __attribute__ ((__always_inline__))
23970 vst1q_f64 (float64_t *a, float64x2_t b)
23972 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
23975 /* vst1q */
23977 __extension__ static __inline void __attribute__ ((__always_inline__))
23978 vst1q_p8 (poly8_t *a, poly8x16_t b)
23980 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23981 (int8x16_t) b);
23984 __extension__ static __inline void __attribute__ ((__always_inline__))
23985 vst1q_p16 (poly16_t *a, poly16x8_t b)
23987 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23988 (int16x8_t) b);
23991 __extension__ static __inline void __attribute__ ((__always_inline__))
23992 vst1q_s8 (int8_t *a, int8x16_t b)
23994 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
23997 __extension__ static __inline void __attribute__ ((__always_inline__))
23998 vst1q_s16 (int16_t *a, int16x8_t b)
24000 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
24003 __extension__ static __inline void __attribute__ ((__always_inline__))
24004 vst1q_s32 (int32_t *a, int32x4_t b)
24006 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24009 __extension__ static __inline void __attribute__ ((__always_inline__))
24010 vst1q_s64 (int64_t *a, int64x2_t b)
24012 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24015 __extension__ static __inline void __attribute__ ((__always_inline__))
24016 vst1q_u8 (uint8_t *a, uint8x16_t b)
24018 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24019 (int8x16_t) b);
24022 __extension__ static __inline void __attribute__ ((__always_inline__))
24023 vst1q_u16 (uint16_t *a, uint16x8_t b)
24025 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24026 (int16x8_t) b);
24029 __extension__ static __inline void __attribute__ ((__always_inline__))
24030 vst1q_u32 (uint32_t *a, uint32x4_t b)
24032 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24033 (int32x4_t) b);
24036 __extension__ static __inline void __attribute__ ((__always_inline__))
24037 vst1q_u64 (uint64_t *a, uint64x2_t b)
24039 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24040 (int64x2_t) b);
24043 /* vstn */
24045 __extension__ static __inline void
24046 vst2_s64 (int64_t * __a, int64x1x2_t val)
24048 __builtin_aarch64_simd_oi __o;
24049 int64x2x2_t temp;
24050 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24051 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24052 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24053 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24054 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24057 __extension__ static __inline void
24058 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24060 __builtin_aarch64_simd_oi __o;
24061 uint64x2x2_t temp;
24062 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24063 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24064 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24065 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24066 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24069 __extension__ static __inline void
24070 vst2_f64 (float64_t * __a, float64x1x2_t val)
24072 __builtin_aarch64_simd_oi __o;
24073 float64x2x2_t temp;
24074 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24075 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24076 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24077 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24078 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24081 __extension__ static __inline void
24082 vst2_s8 (int8_t * __a, int8x8x2_t val)
24084 __builtin_aarch64_simd_oi __o;
24085 int8x16x2_t temp;
24086 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24087 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24088 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24089 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24090 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24093 __extension__ static __inline void __attribute__ ((__always_inline__))
24094 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24096 __builtin_aarch64_simd_oi __o;
24097 poly8x16x2_t temp;
24098 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24099 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24100 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24101 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24102 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24105 __extension__ static __inline void __attribute__ ((__always_inline__))
24106 vst2_s16 (int16_t * __a, int16x4x2_t val)
24108 __builtin_aarch64_simd_oi __o;
24109 int16x8x2_t temp;
24110 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24111 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24112 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24113 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24114 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24117 __extension__ static __inline void __attribute__ ((__always_inline__))
24118 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24120 __builtin_aarch64_simd_oi __o;
24121 poly16x8x2_t temp;
24122 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24123 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24124 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24125 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24126 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24129 __extension__ static __inline void __attribute__ ((__always_inline__))
24130 vst2_s32 (int32_t * __a, int32x2x2_t val)
24132 __builtin_aarch64_simd_oi __o;
24133 int32x4x2_t temp;
24134 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24135 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24136 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24137 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24138 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24141 __extension__ static __inline void __attribute__ ((__always_inline__))
24142 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24144 __builtin_aarch64_simd_oi __o;
24145 uint8x16x2_t temp;
24146 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24147 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24148 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24149 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24150 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24153 __extension__ static __inline void __attribute__ ((__always_inline__))
24154 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24156 __builtin_aarch64_simd_oi __o;
24157 uint16x8x2_t temp;
24158 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24159 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24160 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24161 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24162 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24165 __extension__ static __inline void __attribute__ ((__always_inline__))
24166 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24168 __builtin_aarch64_simd_oi __o;
24169 uint32x4x2_t temp;
24170 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24171 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24172 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24173 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24174 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24177 __extension__ static __inline void __attribute__ ((__always_inline__))
24178 vst2_f32 (float32_t * __a, float32x2x2_t val)
24180 __builtin_aarch64_simd_oi __o;
24181 float32x4x2_t temp;
24182 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24183 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24184 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24185 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24186 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24189 __extension__ static __inline void __attribute__ ((__always_inline__))
24190 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24192 __builtin_aarch64_simd_oi __o;
24193 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24194 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24195 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24198 __extension__ static __inline void __attribute__ ((__always_inline__))
24199 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24201 __builtin_aarch64_simd_oi __o;
24202 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24203 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24204 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24207 __extension__ static __inline void __attribute__ ((__always_inline__))
24208 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24210 __builtin_aarch64_simd_oi __o;
24211 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24212 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24213 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24216 __extension__ static __inline void __attribute__ ((__always_inline__))
24217 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24219 __builtin_aarch64_simd_oi __o;
24220 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24221 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24222 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24225 __extension__ static __inline void __attribute__ ((__always_inline__))
24226 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24228 __builtin_aarch64_simd_oi __o;
24229 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24230 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24231 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24234 __extension__ static __inline void __attribute__ ((__always_inline__))
24235 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24237 __builtin_aarch64_simd_oi __o;
24238 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24239 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24240 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24243 __extension__ static __inline void __attribute__ ((__always_inline__))
24244 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24246 __builtin_aarch64_simd_oi __o;
24247 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24248 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24249 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24252 __extension__ static __inline void __attribute__ ((__always_inline__))
24253 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24255 __builtin_aarch64_simd_oi __o;
24256 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24257 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24258 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24261 __extension__ static __inline void __attribute__ ((__always_inline__))
24262 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24264 __builtin_aarch64_simd_oi __o;
24265 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24266 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24267 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24270 __extension__ static __inline void __attribute__ ((__always_inline__))
24271 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24273 __builtin_aarch64_simd_oi __o;
24274 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24275 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24276 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24279 __extension__ static __inline void __attribute__ ((__always_inline__))
24280 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24282 __builtin_aarch64_simd_oi __o;
24283 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24284 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24285 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24288 __extension__ static __inline void __attribute__ ((__always_inline__))
24289 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24291 __builtin_aarch64_simd_oi __o;
24292 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24293 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24294 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24297 __extension__ static __inline void
24298 vst3_s64 (int64_t * __a, int64x1x3_t val)
24300 __builtin_aarch64_simd_ci __o;
24301 int64x2x3_t temp;
24302 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24303 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24304 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24305 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24306 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24307 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24308 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24311 __extension__ static __inline void
24312 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24314 __builtin_aarch64_simd_ci __o;
24315 uint64x2x3_t temp;
24316 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24317 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24318 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24319 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24320 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24321 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24322 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24325 __extension__ static __inline void
24326 vst3_f64 (float64_t * __a, float64x1x3_t val)
24328 __builtin_aarch64_simd_ci __o;
24329 float64x2x3_t temp;
24330 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24331 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24332 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24333 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24334 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24335 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24336 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24339 __extension__ static __inline void
24340 vst3_s8 (int8_t * __a, int8x8x3_t val)
24342 __builtin_aarch64_simd_ci __o;
24343 int8x16x3_t temp;
24344 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24345 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24346 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24347 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24348 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24349 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24350 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24353 __extension__ static __inline void __attribute__ ((__always_inline__))
24354 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24356 __builtin_aarch64_simd_ci __o;
24357 poly8x16x3_t temp;
24358 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24359 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24360 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24361 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24362 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24363 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24364 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24367 __extension__ static __inline void __attribute__ ((__always_inline__))
24368 vst3_s16 (int16_t * __a, int16x4x3_t val)
24370 __builtin_aarch64_simd_ci __o;
24371 int16x8x3_t temp;
24372 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24373 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24374 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24375 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24376 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24377 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24378 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24381 __extension__ static __inline void __attribute__ ((__always_inline__))
24382 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24384 __builtin_aarch64_simd_ci __o;
24385 poly16x8x3_t temp;
24386 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24387 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24388 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24389 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24390 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24391 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24392 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24395 __extension__ static __inline void __attribute__ ((__always_inline__))
24396 vst3_s32 (int32_t * __a, int32x2x3_t val)
24398 __builtin_aarch64_simd_ci __o;
24399 int32x4x3_t temp;
24400 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24401 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24402 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24403 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24404 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24405 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24406 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24409 __extension__ static __inline void __attribute__ ((__always_inline__))
24410 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24412 __builtin_aarch64_simd_ci __o;
24413 uint8x16x3_t temp;
24414 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24415 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24416 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24417 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24418 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24419 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24420 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24423 __extension__ static __inline void __attribute__ ((__always_inline__))
24424 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24426 __builtin_aarch64_simd_ci __o;
24427 uint16x8x3_t temp;
24428 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24429 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24430 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24431 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24432 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24433 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24434 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24437 __extension__ static __inline void __attribute__ ((__always_inline__))
24438 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24440 __builtin_aarch64_simd_ci __o;
24441 uint32x4x3_t temp;
24442 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24443 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24444 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24445 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24446 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24447 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24448 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24451 __extension__ static __inline void __attribute__ ((__always_inline__))
24452 vst3_f32 (float32_t * __a, float32x2x3_t val)
24454 __builtin_aarch64_simd_ci __o;
24455 float32x4x3_t temp;
24456 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24457 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24458 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24459 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24460 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24461 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24462 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24465 __extension__ static __inline void __attribute__ ((__always_inline__))
24466 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24468 __builtin_aarch64_simd_ci __o;
24469 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24470 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24471 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24472 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24475 __extension__ static __inline void __attribute__ ((__always_inline__))
24476 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24478 __builtin_aarch64_simd_ci __o;
24479 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24480 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24481 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24482 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24485 __extension__ static __inline void __attribute__ ((__always_inline__))
24486 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24488 __builtin_aarch64_simd_ci __o;
24489 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24490 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24491 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24492 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24495 __extension__ static __inline void __attribute__ ((__always_inline__))
24496 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24498 __builtin_aarch64_simd_ci __o;
24499 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24500 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24501 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24502 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24505 __extension__ static __inline void __attribute__ ((__always_inline__))
24506 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24508 __builtin_aarch64_simd_ci __o;
24509 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24510 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24511 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24512 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24515 __extension__ static __inline void __attribute__ ((__always_inline__))
24516 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24518 __builtin_aarch64_simd_ci __o;
24519 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24520 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24521 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24522 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24525 __extension__ static __inline void __attribute__ ((__always_inline__))
24526 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24528 __builtin_aarch64_simd_ci __o;
24529 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24530 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24531 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24532 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24535 __extension__ static __inline void __attribute__ ((__always_inline__))
24536 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24538 __builtin_aarch64_simd_ci __o;
24539 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24540 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24541 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24542 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24545 __extension__ static __inline void __attribute__ ((__always_inline__))
24546 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24548 __builtin_aarch64_simd_ci __o;
24549 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24550 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24551 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24552 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24555 __extension__ static __inline void __attribute__ ((__always_inline__))
24556 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24558 __builtin_aarch64_simd_ci __o;
24559 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24560 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24561 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24562 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24565 __extension__ static __inline void __attribute__ ((__always_inline__))
24566 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24568 __builtin_aarch64_simd_ci __o;
24569 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24570 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24571 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24572 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24575 __extension__ static __inline void __attribute__ ((__always_inline__))
24576 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24578 __builtin_aarch64_simd_ci __o;
24579 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24580 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24581 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24582 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24585 __extension__ static __inline void
24586 vst4_s64 (int64_t * __a, int64x1x4_t val)
24588 __builtin_aarch64_simd_xi __o;
24589 int64x2x4_t temp;
24590 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24591 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24592 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24593 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
24594 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24595 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24596 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24597 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24598 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24601 __extension__ static __inline void
24602 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24604 __builtin_aarch64_simd_xi __o;
24605 uint64x2x4_t temp;
24606 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24607 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24608 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24609 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
24610 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24611 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24612 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24613 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24614 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24617 __extension__ static __inline void
24618 vst4_f64 (float64_t * __a, float64x1x4_t val)
24620 __builtin_aarch64_simd_xi __o;
24621 float64x2x4_t temp;
24622 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24623 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24624 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24625 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
24626 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24627 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24628 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24629 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24630 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24633 __extension__ static __inline void
24634 vst4_s8 (int8_t * __a, int8x8x4_t val)
24636 __builtin_aarch64_simd_xi __o;
24637 int8x16x4_t temp;
24638 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24639 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24640 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24641 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
24642 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24643 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24644 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24645 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24646 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24649 __extension__ static __inline void __attribute__ ((__always_inline__))
24650 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24652 __builtin_aarch64_simd_xi __o;
24653 poly8x16x4_t temp;
24654 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24655 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24656 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24657 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
24658 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24659 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24660 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24661 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24662 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24665 __extension__ static __inline void __attribute__ ((__always_inline__))
24666 vst4_s16 (int16_t * __a, int16x4x4_t val)
24668 __builtin_aarch64_simd_xi __o;
24669 int16x8x4_t temp;
24670 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24671 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24672 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24673 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
24674 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24675 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24676 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24677 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24678 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24681 __extension__ static __inline void __attribute__ ((__always_inline__))
24682 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24684 __builtin_aarch64_simd_xi __o;
24685 poly16x8x4_t temp;
24686 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24687 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24688 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24689 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
24690 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24691 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24692 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24693 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24694 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24697 __extension__ static __inline void __attribute__ ((__always_inline__))
24698 vst4_s32 (int32_t * __a, int32x2x4_t val)
24700 __builtin_aarch64_simd_xi __o;
24701 int32x4x4_t temp;
24702 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24703 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24704 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24705 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
24706 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24707 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24708 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24709 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24710 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24713 __extension__ static __inline void __attribute__ ((__always_inline__))
24714 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24716 __builtin_aarch64_simd_xi __o;
24717 uint8x16x4_t temp;
24718 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24719 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24720 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24721 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
24722 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24723 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24724 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24725 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24726 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24729 __extension__ static __inline void __attribute__ ((__always_inline__))
24730 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24732 __builtin_aarch64_simd_xi __o;
24733 uint16x8x4_t temp;
24734 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24735 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24736 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24737 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
24738 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24739 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24740 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24741 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24742 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24745 __extension__ static __inline void __attribute__ ((__always_inline__))
24746 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
24748 __builtin_aarch64_simd_xi __o;
24749 uint32x4x4_t temp;
24750 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24751 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24752 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24753 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
24754 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24755 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24756 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24757 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24758 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24761 __extension__ static __inline void __attribute__ ((__always_inline__))
24762 vst4_f32 (float32_t * __a, float32x2x4_t val)
24764 __builtin_aarch64_simd_xi __o;
24765 float32x4x4_t temp;
24766 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24767 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24768 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24769 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
24770 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
24771 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
24772 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
24773 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
24774 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24777 __extension__ static __inline void __attribute__ ((__always_inline__))
24778 vst4q_s8 (int8_t * __a, int8x16x4_t val)
24780 __builtin_aarch64_simd_xi __o;
24781 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24782 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24783 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24784 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24785 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24788 __extension__ static __inline void __attribute__ ((__always_inline__))
24789 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
24791 __builtin_aarch64_simd_xi __o;
24792 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24793 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24794 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24795 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24796 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24799 __extension__ static __inline void __attribute__ ((__always_inline__))
24800 vst4q_s16 (int16_t * __a, int16x8x4_t val)
24802 __builtin_aarch64_simd_xi __o;
24803 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24804 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24805 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24806 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24807 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24810 __extension__ static __inline void __attribute__ ((__always_inline__))
24811 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24813 __builtin_aarch64_simd_xi __o;
24814 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24815 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24816 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24817 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24818 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24821 __extension__ static __inline void __attribute__ ((__always_inline__))
24822 vst4q_s32 (int32_t * __a, int32x4x4_t val)
24824 __builtin_aarch64_simd_xi __o;
24825 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24826 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24827 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24828 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24829 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24832 __extension__ static __inline void __attribute__ ((__always_inline__))
24833 vst4q_s64 (int64_t * __a, int64x2x4_t val)
24835 __builtin_aarch64_simd_xi __o;
24836 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24837 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24838 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24839 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24840 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24843 __extension__ static __inline void __attribute__ ((__always_inline__))
24844 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
24846 __builtin_aarch64_simd_xi __o;
24847 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24848 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24849 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24850 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24851 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24854 __extension__ static __inline void __attribute__ ((__always_inline__))
24855 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
24857 __builtin_aarch64_simd_xi __o;
24858 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24859 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24860 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24861 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24862 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24865 __extension__ static __inline void __attribute__ ((__always_inline__))
24866 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
24868 __builtin_aarch64_simd_xi __o;
24869 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24870 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24871 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24872 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24873 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24876 __extension__ static __inline void __attribute__ ((__always_inline__))
24877 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
24879 __builtin_aarch64_simd_xi __o;
24880 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24881 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24882 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24883 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24884 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24887 __extension__ static __inline void __attribute__ ((__always_inline__))
24888 vst4q_f32 (float32_t * __a, float32x4x4_t val)
24890 __builtin_aarch64_simd_xi __o;
24891 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
24892 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
24893 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
24894 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
24895 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24898 __extension__ static __inline void __attribute__ ((__always_inline__))
24899 vst4q_f64 (float64_t * __a, float64x2x4_t val)
24901 __builtin_aarch64_simd_xi __o;
24902 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
24903 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
24904 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24905 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24906 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24909 /* vsub */
24911 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24912 vsubd_s64 (int64x1_t __a, int64x1_t __b)
24914 return __a - __b;
24917 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24918 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
24920 return __a - __b;
24923 /* vtbx1 */
24925 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24926 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24928 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24929 vmov_n_u8 (8));
24930 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24932 return vbsl_s8 (__mask, __tbl, __r);
24935 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24936 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24938 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24939 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24941 return vbsl_u8 (__mask, __tbl, __r);
24944 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24945 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24947 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24948 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24950 return vbsl_p8 (__mask, __tbl, __r);
24953 /* vtbx3 */
24955 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24956 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24958 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24959 vmov_n_u8 (24));
24960 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24962 return vbsl_s8 (__mask, __tbl, __r);
24965 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24966 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24968 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24969 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24971 return vbsl_u8 (__mask, __tbl, __r);
24974 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24975 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24977 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24978 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24980 return vbsl_p8 (__mask, __tbl, __r);
24983 /* vtrn */
24985 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24986 vtrn_f32 (float32x2_t a, float32x2_t b)
24988 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24991 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24992 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24994 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24997 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24998 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25000 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25003 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25004 vtrn_s8 (int8x8_t a, int8x8_t b)
25006 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25009 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25010 vtrn_s16 (int16x4_t a, int16x4_t b)
25012 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25015 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25016 vtrn_s32 (int32x2_t a, int32x2_t b)
25018 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25021 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25022 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25024 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25027 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25028 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25030 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25033 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25034 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25036 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25039 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25040 vtrnq_f32 (float32x4_t a, float32x4_t b)
25042 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25045 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25046 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25048 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25051 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25052 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25054 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25057 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25058 vtrnq_s8 (int8x16_t a, int8x16_t b)
25060 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25063 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25064 vtrnq_s16 (int16x8_t a, int16x8_t b)
25066 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25069 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25070 vtrnq_s32 (int32x4_t a, int32x4_t b)
25072 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25075 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25076 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25078 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25081 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25082 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25084 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25087 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25088 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25090 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25093 /* vtst */
25095 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25096 vtst_s8 (int8x8_t __a, int8x8_t __b)
25098 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25101 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25102 vtst_s16 (int16x4_t __a, int16x4_t __b)
25104 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25108 vtst_s32 (int32x2_t __a, int32x2_t __b)
25110 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25113 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25114 vtst_s64 (int64x1_t __a, int64x1_t __b)
25116 return (__a & __b) ? -1ll : 0ll;
25119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25120 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25122 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25123 (int8x8_t) __b);
25126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25127 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25129 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25130 (int16x4_t) __b);
25133 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25134 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25136 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25137 (int32x2_t) __b);
25140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25141 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25143 return (__a & __b) ? -1ll : 0ll;
25146 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25147 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25149 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25152 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25153 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25155 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25159 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25161 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25164 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25165 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25167 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25170 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25171 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25173 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25174 (int8x16_t) __b);
25177 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25178 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25180 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25181 (int16x8_t) __b);
25184 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25185 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25187 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25188 (int32x4_t) __b);
25191 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25192 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25194 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25195 (int64x2_t) __b);
25198 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25199 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25201 return (__a & __b) ? -1ll : 0ll;
25204 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25205 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25207 return (__a & __b) ? -1ll : 0ll;
25210 /* vuqadd */
25212 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25213 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25215 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25218 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25219 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25221 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25224 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25225 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25227 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25230 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25231 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25233 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25236 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25237 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25239 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25242 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25243 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25245 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25248 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25249 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25251 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25254 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25255 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25257 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25260 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25261 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25263 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25266 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25267 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25269 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25272 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25273 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25275 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25278 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25279 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25281 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25284 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25285 __extension__ static __inline rettype \
25286 __attribute__ ((__always_inline__)) \
25287 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25289 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25290 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25293 #define __INTERLEAVE_LIST(op) \
25294 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25295 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25296 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25297 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25298 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25299 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25300 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25301 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25302 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25303 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25304 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25305 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25306 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25307 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25308 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25309 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25310 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25311 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25313 /* vuzp */
25315 __INTERLEAVE_LIST (uzp)
25317 /* vzip */
25319 __INTERLEAVE_LIST (zip)
25321 #undef __INTERLEAVE_LIST
25322 #undef __DEFINTERLEAVE
25324 /* End of optimal implementations in approved order. */
25326 #undef __aarch64_vget_lane_any
25327 #undef __aarch64_vget_lane_f32
25328 #undef __aarch64_vget_lane_f64
25329 #undef __aarch64_vget_lane_p8
25330 #undef __aarch64_vget_lane_p16
25331 #undef __aarch64_vget_lane_s8
25332 #undef __aarch64_vget_lane_s16
25333 #undef __aarch64_vget_lane_s32
25334 #undef __aarch64_vget_lane_s64
25335 #undef __aarch64_vget_lane_u8
25336 #undef __aarch64_vget_lane_u16
25337 #undef __aarch64_vget_lane_u32
25338 #undef __aarch64_vget_lane_u64
25340 #undef __aarch64_vgetq_lane_f32
25341 #undef __aarch64_vgetq_lane_f64
25342 #undef __aarch64_vgetq_lane_p8
25343 #undef __aarch64_vgetq_lane_p16
25344 #undef __aarch64_vgetq_lane_s8
25345 #undef __aarch64_vgetq_lane_s16
25346 #undef __aarch64_vgetq_lane_s32
25347 #undef __aarch64_vgetq_lane_s64
25348 #undef __aarch64_vgetq_lane_u8
25349 #undef __aarch64_vgetq_lane_u16
25350 #undef __aarch64_vgetq_lane_u32
25351 #undef __aarch64_vgetq_lane_u64
25353 #undef __aarch64_vdup_lane_any
25354 #undef __aarch64_vdup_lane_f32
25355 #undef __aarch64_vdup_lane_f64
25356 #undef __aarch64_vdup_lane_p8
25357 #undef __aarch64_vdup_lane_p16
25358 #undef __aarch64_vdup_lane_s8
25359 #undef __aarch64_vdup_lane_s16
25360 #undef __aarch64_vdup_lane_s32
25361 #undef __aarch64_vdup_lane_s64
25362 #undef __aarch64_vdup_lane_u8
25363 #undef __aarch64_vdup_lane_u16
25364 #undef __aarch64_vdup_lane_u32
25365 #undef __aarch64_vdup_lane_u64
25366 #undef __aarch64_vdup_laneq_f32
25367 #undef __aarch64_vdup_laneq_f64
25368 #undef __aarch64_vdup_laneq_p8
25369 #undef __aarch64_vdup_laneq_p16
25370 #undef __aarch64_vdup_laneq_s8
25371 #undef __aarch64_vdup_laneq_s16
25372 #undef __aarch64_vdup_laneq_s32
25373 #undef __aarch64_vdup_laneq_s64
25374 #undef __aarch64_vdup_laneq_u8
25375 #undef __aarch64_vdup_laneq_u16
25376 #undef __aarch64_vdup_laneq_u32
25377 #undef __aarch64_vdup_laneq_u64
25378 #undef __aarch64_vdupq_lane_f32
25379 #undef __aarch64_vdupq_lane_f64
25380 #undef __aarch64_vdupq_lane_p8
25381 #undef __aarch64_vdupq_lane_p16
25382 #undef __aarch64_vdupq_lane_s8
25383 #undef __aarch64_vdupq_lane_s16
25384 #undef __aarch64_vdupq_lane_s32
25385 #undef __aarch64_vdupq_lane_s64
25386 #undef __aarch64_vdupq_lane_u8
25387 #undef __aarch64_vdupq_lane_u16
25388 #undef __aarch64_vdupq_lane_u32
25389 #undef __aarch64_vdupq_lane_u64
25390 #undef __aarch64_vdupq_laneq_f32
25391 #undef __aarch64_vdupq_laneq_f64
25392 #undef __aarch64_vdupq_laneq_p8
25393 #undef __aarch64_vdupq_laneq_p16
25394 #undef __aarch64_vdupq_laneq_s8
25395 #undef __aarch64_vdupq_laneq_s16
25396 #undef __aarch64_vdupq_laneq_s32
25397 #undef __aarch64_vdupq_laneq_s64
25398 #undef __aarch64_vdupq_laneq_u8
25399 #undef __aarch64_vdupq_laneq_u16
25400 #undef __aarch64_vdupq_laneq_u32
25401 #undef __aarch64_vdupq_laneq_u64
25403 #endif