Missed part from my V1DI'fication. This makes something around 50
[llvm-gcc-4.2.git] / gcc / config / arm / arm_neon.h
blobff92d7f4812cd978fdd54da57be270e5e17dc4ff
1 /* LLVM LOCAL file Changed to use preprocessor macros. */
2 /* APPLE LOCAL file v7 support. Merge from Codesourcery */
3 /* ARM NEON intrinsics include file. This file is generated automatically
4 using neon-gen.ml. Please do not edit manually.
6 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
7 Contributed by CodeSourcery.
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 2, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING. If not, write to the
23 Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
24 MA 02110-1301, USA. */
26 /* As a special exception, if you include this header file into source
27 files compiled by GCC, this header file does not by itself cause
28 the resulting executable to be covered by the GNU General Public
29 License. This exception does not however invalidate any other
30 reasons why the executable file might be covered by the GNU General
31 Public License. */
33 #ifndef _GCC_ARM_NEON_H
34 #define _GCC_ARM_NEON_H 1
36 #ifndef __ARM_NEON__
37 #error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
38 #else
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
44 #include <stdint.h>
46 typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_neon_di int64x1_t __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_neon_uhi uint16x4_t __attribute__ ((__vector_size__ (8)));
55 typedef __builtin_neon_usi uint32x2_t __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_neon_udi uint64x1_t __attribute__ ((__vector_size__ (8)));
57 typedef __builtin_neon_qi int8x16_t __attribute__ ((__vector_size__ (16)));
58 typedef __builtin_neon_hi int16x8_t __attribute__ ((__vector_size__ (16)));
59 typedef __builtin_neon_si int32x4_t __attribute__ ((__vector_size__ (16)));
60 typedef __builtin_neon_di int64x2_t __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_neon_sf float32x4_t __attribute__ ((__vector_size__ (16)));
62 typedef __builtin_neon_poly8 poly8x16_t __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_neon_poly16 poly16x8_t __attribute__ ((__vector_size__ (16)));
64 typedef __builtin_neon_uqi uint8x16_t __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16)));
66 typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_neon_sf float32_t;
70 typedef __builtin_neon_poly8 poly8_t;
71 typedef __builtin_neon_poly16 poly16_t;
73 typedef struct int8x8x2_t
75 int8x8_t val[2];
76 } int8x8x2_t;
78 typedef struct int8x16x2_t
80 int8x16_t val[2];
81 } int8x16x2_t;
83 typedef struct int16x4x2_t
85 int16x4_t val[2];
86 } int16x4x2_t;
88 typedef struct int16x8x2_t
90 int16x8_t val[2];
91 } int16x8x2_t;
93 typedef struct int32x2x2_t
95 int32x2_t val[2];
96 } int32x2x2_t;
98 typedef struct int32x4x2_t
100 int32x4_t val[2];
101 } int32x4x2_t;
103 typedef struct int64x1x2_t
105 int64x1_t val[2];
106 } int64x1x2_t;
108 typedef struct int64x2x2_t
110 int64x2_t val[2];
111 } int64x2x2_t;
113 typedef struct uint8x8x2_t
115 uint8x8_t val[2];
116 } uint8x8x2_t;
118 typedef struct uint8x16x2_t
120 uint8x16_t val[2];
121 } uint8x16x2_t;
123 typedef struct uint16x4x2_t
125 uint16x4_t val[2];
126 } uint16x4x2_t;
128 typedef struct uint16x8x2_t
130 uint16x8_t val[2];
131 } uint16x8x2_t;
133 typedef struct uint32x2x2_t
135 uint32x2_t val[2];
136 } uint32x2x2_t;
138 typedef struct uint32x4x2_t
140 uint32x4_t val[2];
141 } uint32x4x2_t;
143 typedef struct uint64x1x2_t
145 uint64x1_t val[2];
146 } uint64x1x2_t;
148 typedef struct uint64x2x2_t
150 uint64x2_t val[2];
151 } uint64x2x2_t;
153 typedef struct float32x2x2_t
155 float32x2_t val[2];
156 } float32x2x2_t;
158 typedef struct float32x4x2_t
160 float32x4_t val[2];
161 } float32x4x2_t;
163 typedef struct poly8x8x2_t
165 poly8x8_t val[2];
166 } poly8x8x2_t;
168 typedef struct poly8x16x2_t
170 poly8x16_t val[2];
171 } poly8x16x2_t;
173 typedef struct poly16x4x2_t
175 poly16x4_t val[2];
176 } poly16x4x2_t;
178 typedef struct poly16x8x2_t
180 poly16x8_t val[2];
181 } poly16x8x2_t;
183 typedef struct int8x8x3_t
185 int8x8_t val[3];
186 } int8x8x3_t;
188 typedef struct int8x16x3_t
190 int8x16_t val[3];
191 } int8x16x3_t;
193 typedef struct int16x4x3_t
195 int16x4_t val[3];
196 } int16x4x3_t;
198 typedef struct int16x8x3_t
200 int16x8_t val[3];
201 } int16x8x3_t;
203 typedef struct int32x2x3_t
205 int32x2_t val[3];
206 } int32x2x3_t;
208 typedef struct int32x4x3_t
210 int32x4_t val[3];
211 } int32x4x3_t;
213 typedef struct int64x1x3_t
215 int64x1_t val[3];
216 } int64x1x3_t;
218 typedef struct int64x2x3_t
220 int64x2_t val[3];
221 } int64x2x3_t;
223 typedef struct uint8x8x3_t
225 uint8x8_t val[3];
226 } uint8x8x3_t;
228 typedef struct uint8x16x3_t
230 uint8x16_t val[3];
231 } uint8x16x3_t;
233 typedef struct uint16x4x3_t
235 uint16x4_t val[3];
236 } uint16x4x3_t;
238 typedef struct uint16x8x3_t
240 uint16x8_t val[3];
241 } uint16x8x3_t;
243 typedef struct uint32x2x3_t
245 uint32x2_t val[3];
246 } uint32x2x3_t;
248 typedef struct uint32x4x3_t
250 uint32x4_t val[3];
251 } uint32x4x3_t;
253 typedef struct uint64x1x3_t
255 uint64x1_t val[3];
256 } uint64x1x3_t;
258 typedef struct uint64x2x3_t
260 uint64x2_t val[3];
261 } uint64x2x3_t;
263 typedef struct float32x2x3_t
265 float32x2_t val[3];
266 } float32x2x3_t;
268 typedef struct float32x4x3_t
270 float32x4_t val[3];
271 } float32x4x3_t;
273 typedef struct poly8x8x3_t
275 poly8x8_t val[3];
276 } poly8x8x3_t;
278 typedef struct poly8x16x3_t
280 poly8x16_t val[3];
281 } poly8x16x3_t;
283 typedef struct poly16x4x3_t
285 poly16x4_t val[3];
286 } poly16x4x3_t;
288 typedef struct poly16x8x3_t
290 poly16x8_t val[3];
291 } poly16x8x3_t;
293 typedef struct int8x8x4_t
295 int8x8_t val[4];
296 } int8x8x4_t;
298 typedef struct int8x16x4_t
300 int8x16_t val[4];
301 } int8x16x4_t;
303 typedef struct int16x4x4_t
305 int16x4_t val[4];
306 } int16x4x4_t;
308 typedef struct int16x8x4_t
310 int16x8_t val[4];
311 } int16x8x4_t;
313 typedef struct int32x2x4_t
315 int32x2_t val[4];
316 } int32x2x4_t;
318 typedef struct int32x4x4_t
320 int32x4_t val[4];
321 } int32x4x4_t;
323 typedef struct int64x1x4_t
325 int64x1_t val[4];
326 } int64x1x4_t;
328 typedef struct int64x2x4_t
330 int64x2_t val[4];
331 } int64x2x4_t;
333 typedef struct uint8x8x4_t
335 uint8x8_t val[4];
336 } uint8x8x4_t;
338 typedef struct uint8x16x4_t
340 uint8x16_t val[4];
341 } uint8x16x4_t;
343 typedef struct uint16x4x4_t
345 uint16x4_t val[4];
346 } uint16x4x4_t;
348 typedef struct uint16x8x4_t
350 uint16x8_t val[4];
351 } uint16x8x4_t;
353 typedef struct uint32x2x4_t
355 uint32x2_t val[4];
356 } uint32x2x4_t;
358 typedef struct uint32x4x4_t
360 uint32x4_t val[4];
361 } uint32x4x4_t;
363 typedef struct uint64x1x4_t
365 uint64x1_t val[4];
366 } uint64x1x4_t;
368 typedef struct uint64x2x4_t
370 uint64x2_t val[4];
371 } uint64x2x4_t;
373 typedef struct float32x2x4_t
375 float32x2_t val[4];
376 } float32x2x4_t;
378 typedef struct float32x4x4_t
380 float32x4_t val[4];
381 } float32x4x4_t;
383 typedef struct poly8x8x4_t
385 poly8x8_t val[4];
386 } poly8x8x4_t;
388 typedef struct poly8x16x4_t
390 poly8x16_t val[4];
391 } poly8x16x4_t;
393 typedef struct poly16x4x4_t
395 poly16x4_t val[4];
396 } poly16x4x4_t;
398 typedef struct poly16x8x4_t
400 poly16x8_t val[4];
401 } poly16x8x4_t;
404 #define vadd_s8(__a, __b) \
405 (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1)
407 #define vadd_s16(__a, __b) \
408 (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1)
410 #define vadd_s32(__a, __b) \
411 (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1)
413 #define vadd_s64(__a, __b) \
414 (int64x1_t)__builtin_neon_vaddv1di (__a, __b, 1)
416 #define vadd_f32(__a, __b) \
417 (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 5)
419 #define vadd_u8(__a, __b) \
420 (uint8x8_t)__builtin_neon_vaddv8qi (__a, __b, 0)
422 #define vadd_u16(__a, __b) \
423 (uint16x4_t)__builtin_neon_vaddv4hi (__a, __b, 0)
425 #define vadd_u32(__a, __b) \
426 (uint32x2_t)__builtin_neon_vaddv2si (__a, __b, 0)
428 #define vadd_u64(__a, __b) \
429 (uint64x1_t)__builtin_neon_vaddv1di (__a, __b, 0)
431 #define vaddq_s8(__a, __b) \
432 (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1)
434 #define vaddq_s16(__a, __b) \
435 (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1)
437 #define vaddq_s32(__a, __b) \
438 (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1)
440 #define vaddq_s64(__a, __b) \
441 (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1)
443 #define vaddq_f32(__a, __b) \
444 (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 5)
446 #define vaddq_u8(__a, __b) \
447 (uint8x16_t)__builtin_neon_vaddv16qi (__a, __b, 0)
449 #define vaddq_u16(__a, __b) \
450 (uint16x8_t)__builtin_neon_vaddv8hi (__a, __b, 0)
452 #define vaddq_u32(__a, __b) \
453 (uint32x4_t)__builtin_neon_vaddv4si (__a, __b, 0)
455 #define vaddq_u64(__a, __b) \
456 (uint64x2_t)__builtin_neon_vaddv2di (__a, __b, 0)
458 #define vaddl_s8(__a, __b) \
459 (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1)
461 #define vaddl_s16(__a, __b) \
462 (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1)
464 #define vaddl_s32(__a, __b) \
465 (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1)
467 #define vaddl_u8(__a, __b) \
468 (uint16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 0)
470 #define vaddl_u16(__a, __b) \
471 (uint32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 0)
473 #define vaddl_u32(__a, __b) \
474 (uint64x2_t)__builtin_neon_vaddlv2si (__a, __b, 0)
476 #define vaddw_s8(__a, __b) \
477 (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1)
479 #define vaddw_s16(__a, __b) \
480 (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1)
482 #define vaddw_s32(__a, __b) \
483 (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1)
485 #define vaddw_u8(__a, __b) \
486 (uint16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 0)
488 #define vaddw_u16(__a, __b) \
489 (uint32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 0)
491 #define vaddw_u32(__a, __b) \
492 (uint64x2_t)__builtin_neon_vaddwv2si (__a, __b, 0)
494 #define vhadd_s8(__a, __b) \
495 (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1)
497 #define vhadd_s16(__a, __b) \
498 (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1)
500 #define vhadd_s32(__a, __b) \
501 (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1)
503 #define vhadd_u8(__a, __b) \
504 (uint8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 0)
506 #define vhadd_u16(__a, __b) \
507 (uint16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 0)
509 #define vhadd_u32(__a, __b) \
510 (uint32x2_t)__builtin_neon_vhaddv2si (__a, __b, 0)
512 #define vhaddq_s8(__a, __b) \
513 (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1)
515 #define vhaddq_s16(__a, __b) \
516 (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1)
518 #define vhaddq_s32(__a, __b) \
519 (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1)
521 #define vhaddq_u8(__a, __b) \
522 (uint8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 0)
524 #define vhaddq_u16(__a, __b) \
525 (uint16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 0)
527 #define vhaddq_u32(__a, __b) \
528 (uint32x4_t)__builtin_neon_vhaddv4si (__a, __b, 0)
530 #define vrhadd_s8(__a, __b) \
531 (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 3)
533 #define vrhadd_s16(__a, __b) \
534 (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 3)
536 #define vrhadd_s32(__a, __b) \
537 (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 3)
539 #define vrhadd_u8(__a, __b) \
540 (uint8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 2)
542 #define vrhadd_u16(__a, __b) \
543 (uint16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 2)
545 #define vrhadd_u32(__a, __b) \
546 (uint32x2_t)__builtin_neon_vhaddv2si (__a, __b, 2)
548 #define vrhaddq_s8(__a, __b) \
549 (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 3)
551 #define vrhaddq_s16(__a, __b) \
552 (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 3)
554 #define vrhaddq_s32(__a, __b) \
555 (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 3)
557 #define vrhaddq_u8(__a, __b) \
558 (uint8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 2)
560 #define vrhaddq_u16(__a, __b) \
561 (uint16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 2)
563 #define vrhaddq_u32(__a, __b) \
564 (uint32x4_t)__builtin_neon_vhaddv4si (__a, __b, 2)
566 #define vqadd_s8(__a, __b) \
567 (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1)
569 #define vqadd_s16(__a, __b) \
570 (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1)
572 #define vqadd_s32(__a, __b) \
573 (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1)
575 #define vqadd_s64(__a, __b) \
576 (int64x1_t)__builtin_neon_vqaddv1di (__a, __b, 1)
578 #define vqadd_u8(__a, __b) \
579 (uint8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 0)
581 #define vqadd_u16(__a, __b) \
582 (uint16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 0)
584 #define vqadd_u32(__a, __b) \
585 (uint32x2_t)__builtin_neon_vqaddv2si (__a, __b, 0)
587 #define vqadd_u64(__a, __b) \
588 (uint64x1_t)__builtin_neon_vqaddv1di (__a, __b, 0)
590 #define vqaddq_s8(__a, __b) \
591 (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1)
593 #define vqaddq_s16(__a, __b) \
594 (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1)
596 #define vqaddq_s32(__a, __b) \
597 (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1)
599 #define vqaddq_s64(__a, __b) \
600 (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1)
602 #define vqaddq_u8(__a, __b) \
603 (uint8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 0)
605 #define vqaddq_u16(__a, __b) \
606 (uint16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 0)
608 #define vqaddq_u32(__a, __b) \
609 (uint32x4_t)__builtin_neon_vqaddv4si (__a, __b, 0)
611 #define vqaddq_u64(__a, __b) \
612 (uint64x2_t)__builtin_neon_vqaddv2di (__a, __b, 0)
614 #define vaddhn_s16(__a, __b) \
615 (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1)
617 #define vaddhn_s32(__a, __b) \
618 (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1)
620 #define vaddhn_s64(__a, __b) \
621 (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1)
623 #define vaddhn_u16(__a, __b) \
624 (uint8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 0)
626 #define vaddhn_u32(__a, __b) \
627 (uint16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 0)
629 #define vaddhn_u64(__a, __b) \
630 (uint32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 0)
632 #define vraddhn_s16(__a, __b) \
633 (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 3)
635 #define vraddhn_s32(__a, __b) \
636 (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 3)
638 #define vraddhn_s64(__a, __b) \
639 (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 3)
641 #define vraddhn_u16(__a, __b) \
642 (uint8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 2)
644 #define vraddhn_u32(__a, __b) \
645 (uint16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 2)
647 #define vraddhn_u64(__a, __b) \
648 (uint32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 2)
650 #define vmul_s8(__a, __b) \
651 (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1)
653 #define vmul_s16(__a, __b) \
654 (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1)
656 #define vmul_s32(__a, __b) \
657 (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1)
659 #define vmul_f32(__a, __b) \
660 (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 5)
662 #define vmul_u8(__a, __b) \
663 (uint8x8_t)__builtin_neon_vmulv8qi (__a, __b, 0)
665 #define vmul_u16(__a, __b) \
666 (uint16x4_t)__builtin_neon_vmulv4hi (__a, __b, 0)
668 #define vmul_u32(__a, __b) \
669 (uint32x2_t)__builtin_neon_vmulv2si (__a, __b, 0)
671 #define vmul_p8(__a, __b) \
672 (poly8x8_t)__builtin_neon_vmulv8qi (__a, __b, 4)
674 #define vmulq_s8(__a, __b) \
675 (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1)
677 #define vmulq_s16(__a, __b) \
678 (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1)
680 #define vmulq_s32(__a, __b) \
681 (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1)
683 #define vmulq_f32(__a, __b) \
684 (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 5)
686 #define vmulq_u8(__a, __b) \
687 (uint8x16_t)__builtin_neon_vmulv16qi (__a, __b, 0)
689 #define vmulq_u16(__a, __b) \
690 (uint16x8_t)__builtin_neon_vmulv8hi (__a, __b, 0)
692 #define vmulq_u32(__a, __b) \
693 (uint32x4_t)__builtin_neon_vmulv4si (__a, __b, 0)
695 #define vmulq_p8(__a, __b) \
696 (poly8x16_t)__builtin_neon_vmulv16qi (__a, __b, 4)
698 #define vqdmulh_s16(__a, __b) \
699 (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1)
701 #define vqdmulh_s32(__a, __b) \
702 (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1)
704 #define vqdmulhq_s16(__a, __b) \
705 (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1)
707 #define vqdmulhq_s32(__a, __b) \
708 (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1)
710 #define vqrdmulh_s16(__a, __b) \
711 (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 3)
713 #define vqrdmulh_s32(__a, __b) \
714 (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 3)
716 #define vqrdmulhq_s16(__a, __b) \
717 (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 3)
719 #define vqrdmulhq_s32(__a, __b) \
720 (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 3)
722 #define vmull_s8(__a, __b) \
723 (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1)
725 #define vmull_s16(__a, __b) \
726 (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1)
728 #define vmull_s32(__a, __b) \
729 (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1)
731 #define vmull_u8(__a, __b) \
732 (uint16x8_t)__builtin_neon_vmullv8qi (__a, __b, 0)
734 #define vmull_u16(__a, __b) \
735 (uint32x4_t)__builtin_neon_vmullv4hi (__a, __b, 0)
737 #define vmull_u32(__a, __b) \
738 (uint64x2_t)__builtin_neon_vmullv2si (__a, __b, 0)
740 #define vmull_p8(__a, __b) \
741 (poly16x8_t)__builtin_neon_vmullv8qi (__a, __b, 4)
743 #define vqdmull_s16(__a, __b) \
744 (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1)
746 #define vqdmull_s32(__a, __b) \
747 (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1)
749 #define vmla_s8(__a, __b, __c) \
750 (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1)
752 #define vmla_s16(__a, __b, __c) \
753 (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1)
755 #define vmla_s32(__a, __b, __c) \
756 (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1)
758 #define vmla_f32(__a, __b, __c) \
759 (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 5)
761 #define vmla_u8(__a, __b, __c) \
762 (uint8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 0)
764 #define vmla_u16(__a, __b, __c) \
765 (uint16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 0)
767 #define vmla_u32(__a, __b, __c) \
768 (uint32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 0)
770 #define vmlaq_s8(__a, __b, __c) \
771 (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1)
773 #define vmlaq_s16(__a, __b, __c) \
774 (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1)
776 #define vmlaq_s32(__a, __b, __c) \
777 (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1)
779 #define vmlaq_f32(__a, __b, __c) \
780 (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 5)
782 #define vmlaq_u8(__a, __b, __c) \
783 (uint8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 0)
785 #define vmlaq_u16(__a, __b, __c) \
786 (uint16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 0)
788 #define vmlaq_u32(__a, __b, __c) \
789 (uint32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 0)
791 #define vmlal_s8(__a, __b, __c) \
792 (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1)
794 #define vmlal_s16(__a, __b, __c) \
795 (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1)
797 #define vmlal_s32(__a, __b, __c) \
798 (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1)
800 #define vmlal_u8(__a, __b, __c) \
801 (uint16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 0)
803 #define vmlal_u16(__a, __b, __c) \
804 (uint32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 0)
806 #define vmlal_u32(__a, __b, __c) \
807 (uint64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 0)
809 #define vqdmlal_s16(__a, __b, __c) \
810 (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1)
812 #define vqdmlal_s32(__a, __b, __c) \
813 (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1)
815 #define vmls_s8(__a, __b, __c) \
816 (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1)
818 #define vmls_s16(__a, __b, __c) \
819 (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1)
821 #define vmls_s32(__a, __b, __c) \
822 (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1)
824 #define vmls_f32(__a, __b, __c) \
825 (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 5)
827 #define vmls_u8(__a, __b, __c) \
828 (uint8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 0)
830 #define vmls_u16(__a, __b, __c) \
831 (uint16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 0)
833 #define vmls_u32(__a, __b, __c) \
834 (uint32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 0)
836 #define vmlsq_s8(__a, __b, __c) \
837 (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1)
839 #define vmlsq_s16(__a, __b, __c) \
840 (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1)
842 #define vmlsq_s32(__a, __b, __c) \
843 (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1)
845 #define vmlsq_f32(__a, __b, __c) \
846 (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 5)
848 #define vmlsq_u8(__a, __b, __c) \
849 (uint8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 0)
851 #define vmlsq_u16(__a, __b, __c) \
852 (uint16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 0)
854 #define vmlsq_u32(__a, __b, __c) \
855 (uint32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 0)
857 #define vmlsl_s8(__a, __b, __c) \
858 (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1)
860 #define vmlsl_s16(__a, __b, __c) \
861 (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1)
863 #define vmlsl_s32(__a, __b, __c) \
864 (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1)
866 #define vmlsl_u8(__a, __b, __c) \
867 (uint16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 0)
869 #define vmlsl_u16(__a, __b, __c) \
870 (uint32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 0)
872 #define vmlsl_u32(__a, __b, __c) \
873 (uint64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 0)
875 #define vqdmlsl_s16(__a, __b, __c) \
876 (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1)
878 #define vqdmlsl_s32(__a, __b, __c) \
879 (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1)
881 #define vsub_s8(__a, __b) \
882 (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1)
884 #define vsub_s16(__a, __b) \
885 (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1)
887 #define vsub_s32(__a, __b) \
888 (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1)
890 #define vsub_s64(__a, __b) \
891 (int64x1_t)__builtin_neon_vsubv1di (__a, __b, 1)
893 #define vsub_f32(__a, __b) \
894 (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 5)
896 #define vsub_u8(__a, __b) \
897 (uint8x8_t)__builtin_neon_vsubv8qi (__a, __b, 0)
899 #define vsub_u16(__a, __b) \
900 (uint16x4_t)__builtin_neon_vsubv4hi (__a, __b, 0)
902 #define vsub_u32(__a, __b) \
903 (uint32x2_t)__builtin_neon_vsubv2si (__a, __b, 0)
905 #define vsub_u64(__a, __b) \
906 (uint64x1_t)__builtin_neon_vsubv1di (__a, __b, 0)
908 #define vsubq_s8(__a, __b) \
909 (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1)
911 #define vsubq_s16(__a, __b) \
912 (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1)
914 #define vsubq_s32(__a, __b) \
915 (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1)
917 #define vsubq_s64(__a, __b) \
918 (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1)
920 #define vsubq_f32(__a, __b) \
921 (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 5)
923 #define vsubq_u8(__a, __b) \
924 (uint8x16_t)__builtin_neon_vsubv16qi (__a, __b, 0)
926 #define vsubq_u16(__a, __b) \
927 (uint16x8_t)__builtin_neon_vsubv8hi (__a, __b, 0)
929 #define vsubq_u32(__a, __b) \
930 (uint32x4_t)__builtin_neon_vsubv4si (__a, __b, 0)
932 #define vsubq_u64(__a, __b) \
933 (uint64x2_t)__builtin_neon_vsubv2di (__a, __b, 0)
935 #define vsubl_s8(__a, __b) \
936 (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1)
938 #define vsubl_s16(__a, __b) \
939 (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1)
941 #define vsubl_s32(__a, __b) \
942 (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1)
944 #define vsubl_u8(__a, __b) \
945 (uint16x8_t)__builtin_neon_vsublv8qi (__a, __b, 0)
947 #define vsubl_u16(__a, __b) \
948 (uint32x4_t)__builtin_neon_vsublv4hi (__a, __b, 0)
950 #define vsubl_u32(__a, __b) \
951 (uint64x2_t)__builtin_neon_vsublv2si (__a, __b, 0)
953 #define vsubw_s8(__a, __b) \
954 (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1)
956 #define vsubw_s16(__a, __b) \
957 (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1)
959 #define vsubw_s32(__a, __b) \
960 (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1)
962 #define vsubw_u8(__a, __b) \
963 (uint16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 0)
965 #define vsubw_u16(__a, __b) \
966 (uint32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 0)
968 #define vsubw_u32(__a, __b) \
969 (uint64x2_t)__builtin_neon_vsubwv2si (__a, __b, 0)
971 #define vhsub_s8(__a, __b) \
972 (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1)
974 #define vhsub_s16(__a, __b) \
975 (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1)
977 #define vhsub_s32(__a, __b) \
978 (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1)
980 #define vhsub_u8(__a, __b) \
981 (uint8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 0)
983 #define vhsub_u16(__a, __b) \
984 (uint16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 0)
986 #define vhsub_u32(__a, __b) \
987 (uint32x2_t)__builtin_neon_vhsubv2si (__a, __b, 0)
989 #define vhsubq_s8(__a, __b) \
990 (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1)
992 #define vhsubq_s16(__a, __b) \
993 (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1)
995 #define vhsubq_s32(__a, __b) \
996 (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1)
998 #define vhsubq_u8(__a, __b) \
999 (uint8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 0)
1001 #define vhsubq_u16(__a, __b) \
1002 (uint16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 0)
1004 #define vhsubq_u32(__a, __b) \
1005 (uint32x4_t)__builtin_neon_vhsubv4si (__a, __b, 0)
1007 #define vqsub_s8(__a, __b) \
1008 (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1)
1010 #define vqsub_s16(__a, __b) \
1011 (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1)
1013 #define vqsub_s32(__a, __b) \
1014 (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1)
1016 #define vqsub_s64(__a, __b) \
1017 (int64x1_t)__builtin_neon_vqsubv1di (__a, __b, 1)
1019 #define vqsub_u8(__a, __b) \
1020 (uint8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 0)
1022 #define vqsub_u16(__a, __b) \
1023 (uint16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 0)
1025 #define vqsub_u32(__a, __b) \
1026 (uint32x2_t)__builtin_neon_vqsubv2si (__a, __b, 0)
1028 #define vqsub_u64(__a, __b) \
1029 (uint64x1_t)__builtin_neon_vqsubv1di (__a, __b, 0)
1031 #define vqsubq_s8(__a, __b) \
1032 (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1)
1034 #define vqsubq_s16(__a, __b) \
1035 (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1)
1037 #define vqsubq_s32(__a, __b) \
1038 (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1)
1040 #define vqsubq_s64(__a, __b) \
1041 (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1)
1043 #define vqsubq_u8(__a, __b) \
1044 (uint8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 0)
1046 #define vqsubq_u16(__a, __b) \
1047 (uint16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 0)
1049 #define vqsubq_u32(__a, __b) \
1050 (uint32x4_t)__builtin_neon_vqsubv4si (__a, __b, 0)
1052 #define vqsubq_u64(__a, __b) \
1053 (uint64x2_t)__builtin_neon_vqsubv2di (__a, __b, 0)
1055 #define vsubhn_s16(__a, __b) \
1056 (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1)
1058 #define vsubhn_s32(__a, __b) \
1059 (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1)
1061 #define vsubhn_s64(__a, __b) \
1062 (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1)
1064 #define vsubhn_u16(__a, __b) \
1065 (uint8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 0)
1067 #define vsubhn_u32(__a, __b) \
1068 (uint16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 0)
1070 #define vsubhn_u64(__a, __b) \
1071 (uint32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 0)
1073 #define vrsubhn_s16(__a, __b) \
1074 (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 3)
1076 #define vrsubhn_s32(__a, __b) \
1077 (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 3)
1079 #define vrsubhn_s64(__a, __b) \
1080 (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 3)
1082 #define vrsubhn_u16(__a, __b) \
1083 (uint8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 2)
1085 #define vrsubhn_u32(__a, __b) \
1086 (uint16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 2)
1088 #define vrsubhn_u64(__a, __b) \
1089 (uint32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 2)
1091 #define vceq_s8(__a, __b) \
1092 (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1)
1094 #define vceq_s16(__a, __b) \
1095 (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1)
1097 #define vceq_s32(__a, __b) \
1098 (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1)
1100 #define vceq_f32(__a, __b) \
1101 (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 5)
1103 #define vceq_u8(__a, __b) \
1104 (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 0)
1106 #define vceq_u16(__a, __b) \
1107 (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 0)
1109 #define vceq_u32(__a, __b) \
1110 (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 0)
1112 #define vceq_p8(__a, __b) \
1113 (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 4)
1115 #define vceqq_s8(__a, __b) \
1116 (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1)
1118 #define vceqq_s16(__a, __b) \
1119 (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1)
1121 #define vceqq_s32(__a, __b) \
1122 (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1)
1124 #define vceqq_f32(__a, __b) \
1125 (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 5)
1127 #define vceqq_u8(__a, __b) \
1128 (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 0)
1130 #define vceqq_u16(__a, __b) \
1131 (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 0)
1133 #define vceqq_u32(__a, __b) \
1134 (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 0)
1136 #define vceqq_p8(__a, __b) \
1137 (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 4)
1139 #define vcge_s8(__a, __b) \
1140 (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1)
1142 #define vcge_s16(__a, __b) \
1143 (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1)
1145 #define vcge_s32(__a, __b) \
1146 (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1)
1148 #define vcge_f32(__a, __b) \
1149 (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 5)
1151 #define vcge_u8(__a, __b) \
1152 (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 0)
1154 #define vcge_u16(__a, __b) \
1155 (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 0)
1157 #define vcge_u32(__a, __b) \
1158 (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 0)
1160 #define vcgeq_s8(__a, __b) \
1161 (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1)
1163 #define vcgeq_s16(__a, __b) \
1164 (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1)
1166 #define vcgeq_s32(__a, __b) \
1167 (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1)
1169 #define vcgeq_f32(__a, __b) \
1170 (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 5)
1172 #define vcgeq_u8(__a, __b) \
1173 (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 0)
1175 #define vcgeq_u16(__a, __b) \
1176 (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 0)
1178 #define vcgeq_u32(__a, __b) \
1179 (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 0)
1181 #define vcle_s8(__a, __b) \
1182 (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1)
1184 #define vcle_s16(__a, __b) \
1185 (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1)
1187 #define vcle_s32(__a, __b) \
1188 (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1)
1190 #define vcle_f32(__a, __b) \
1191 (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 5)
1193 #define vcle_u8(__a, __b) \
1194 (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 0)
1196 #define vcle_u16(__a, __b) \
1197 (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 0)
1199 #define vcle_u32(__a, __b) \
1200 (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 0)
1202 #define vcleq_s8(__a, __b) \
1203 (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1)
1205 #define vcleq_s16(__a, __b) \
1206 (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1)
1208 #define vcleq_s32(__a, __b) \
1209 (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1)
1211 #define vcleq_f32(__a, __b) \
1212 (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 5)
1214 #define vcleq_u8(__a, __b) \
1215 (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 0)
1217 #define vcleq_u16(__a, __b) \
1218 (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 0)
1220 #define vcleq_u32(__a, __b) \
1221 (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 0)
1223 #define vcgt_s8(__a, __b) \
1224 (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1)
1226 #define vcgt_s16(__a, __b) \
1227 (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1)
1229 #define vcgt_s32(__a, __b) \
1230 (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1)
1232 #define vcgt_f32(__a, __b) \
1233 (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 5)
1235 #define vcgt_u8(__a, __b) \
1236 (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 0)
1238 #define vcgt_u16(__a, __b) \
1239 (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 0)
1241 #define vcgt_u32(__a, __b) \
1242 (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 0)
1244 #define vcgtq_s8(__a, __b) \
1245 (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1)
1247 #define vcgtq_s16(__a, __b) \
1248 (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1)
1250 #define vcgtq_s32(__a, __b) \
1251 (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1)
1253 #define vcgtq_f32(__a, __b) \
1254 (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 5)
1256 #define vcgtq_u8(__a, __b) \
1257 (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 0)
1259 #define vcgtq_u16(__a, __b) \
1260 (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 0)
1262 #define vcgtq_u32(__a, __b) \
1263 (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 0)
1265 #define vclt_s8(__a, __b) \
1266 (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1)
1268 #define vclt_s16(__a, __b) \
1269 (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1)
1271 #define vclt_s32(__a, __b) \
1272 (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1)
1274 #define vclt_f32(__a, __b) \
1275 (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 5)
1277 #define vclt_u8(__a, __b) \
1278 (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 0)
1280 #define vclt_u16(__a, __b) \
1281 (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 0)
1283 #define vclt_u32(__a, __b) \
1284 (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 0)
1286 #define vcltq_s8(__a, __b) \
1287 (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1)
1289 #define vcltq_s16(__a, __b) \
1290 (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1)
1292 #define vcltq_s32(__a, __b) \
1293 (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1)
1295 #define vcltq_f32(__a, __b) \
1296 (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 5)
1298 #define vcltq_u8(__a, __b) \
1299 (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 0)
1301 #define vcltq_u16(__a, __b) \
1302 (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 0)
1304 #define vcltq_u32(__a, __b) \
1305 (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 0)
1307 #define vcage_f32(__a, __b) \
1308 (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 5)
1310 #define vcageq_f32(__a, __b) \
1311 (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 5)
1313 #define vcale_f32(__a, __b) \
1314 (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 5)
1316 #define vcaleq_f32(__a, __b) \
1317 (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 5)
1319 #define vcagt_f32(__a, __b) \
1320 (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 5)
1322 #define vcagtq_f32(__a, __b) \
1323 (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 5)
1325 #define vcalt_f32(__a, __b) \
1326 (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 5)
1328 #define vcaltq_f32(__a, __b) \
1329 (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 5)
1331 #define vtst_s8(__a, __b) \
1332 (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1)
1334 #define vtst_s16(__a, __b) \
1335 (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1)
1337 #define vtst_s32(__a, __b) \
1338 (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1)
1340 #define vtst_u8(__a, __b) \
1341 (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 0)
1343 #define vtst_u16(__a, __b) \
1344 (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 0)
1346 #define vtst_u32(__a, __b) \
1347 (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 0)
1349 #define vtst_p8(__a, __b) \
1350 (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 4)
1352 #define vtstq_s8(__a, __b) \
1353 (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1)
1355 #define vtstq_s16(__a, __b) \
1356 (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1)
1358 #define vtstq_s32(__a, __b) \
1359 (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1)
1361 #define vtstq_u8(__a, __b) \
1362 (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 0)
1364 #define vtstq_u16(__a, __b) \
1365 (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 0)
1367 #define vtstq_u32(__a, __b) \
1368 (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 0)
1370 #define vtstq_p8(__a, __b) \
1371 (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 4)
1373 #define vabd_s8(__a, __b) \
1374 (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1)
1376 #define vabd_s16(__a, __b) \
1377 (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1)
1379 #define vabd_s32(__a, __b) \
1380 (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1)
1382 #define vabd_f32(__a, __b) \
1383 (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 5)
1385 #define vabd_u8(__a, __b) \
1386 (uint8x8_t)__builtin_neon_vabdv8qi (__a, __b, 0)
1388 #define vabd_u16(__a, __b) \
1389 (uint16x4_t)__builtin_neon_vabdv4hi (__a, __b, 0)
1391 #define vabd_u32(__a, __b) \
1392 (uint32x2_t)__builtin_neon_vabdv2si (__a, __b, 0)
1394 #define vabdq_s8(__a, __b) \
1395 (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1)
1397 #define vabdq_s16(__a, __b) \
1398 (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1)
1400 #define vabdq_s32(__a, __b) \
1401 (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1)
1403 #define vabdq_f32(__a, __b) \
1404 (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 5)
1406 #define vabdq_u8(__a, __b) \
1407 (uint8x16_t)__builtin_neon_vabdv16qi (__a, __b, 0)
1409 #define vabdq_u16(__a, __b) \
1410 (uint16x8_t)__builtin_neon_vabdv8hi (__a, __b, 0)
1412 #define vabdq_u32(__a, __b) \
1413 (uint32x4_t)__builtin_neon_vabdv4si (__a, __b, 0)
1415 #define vabdl_s8(__a, __b) \
1416 (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1)
1418 #define vabdl_s16(__a, __b) \
1419 (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1)
1421 #define vabdl_s32(__a, __b) \
1422 (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1)
1424 #define vabdl_u8(__a, __b) \
1425 (uint16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 0)
1427 #define vabdl_u16(__a, __b) \
1428 (uint32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 0)
1430 #define vabdl_u32(__a, __b) \
1431 (uint64x2_t)__builtin_neon_vabdlv2si (__a, __b, 0)
1433 #define vaba_s8(__a, __b, __c) \
1434 (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1)
1436 #define vaba_s16(__a, __b, __c) \
1437 (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1)
1439 #define vaba_s32(__a, __b, __c) \
1440 (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1)
1442 #define vaba_u8(__a, __b, __c) \
1443 (uint8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 0)
1445 #define vaba_u16(__a, __b, __c) \
1446 (uint16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 0)
1448 #define vaba_u32(__a, __b, __c) \
1449 (uint32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 0)
1451 #define vabaq_s8(__a, __b, __c) \
1452 (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1)
1454 #define vabaq_s16(__a, __b, __c) \
1455 (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1)
1457 #define vabaq_s32(__a, __b, __c) \
1458 (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1)
1460 #define vabaq_u8(__a, __b, __c) \
1461 (uint8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 0)
1463 #define vabaq_u16(__a, __b, __c) \
1464 (uint16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 0)
1466 #define vabaq_u32(__a, __b, __c) \
1467 (uint32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 0)
1469 #define vabal_s8(__a, __b, __c) \
1470 (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1)
1472 #define vabal_s16(__a, __b, __c) \
1473 (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1)
1475 #define vabal_s32(__a, __b, __c) \
1476 (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1)
1478 #define vabal_u8(__a, __b, __c) \
1479 (uint16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 0)
1481 #define vabal_u16(__a, __b, __c) \
1482 (uint32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 0)
1484 #define vabal_u32(__a, __b, __c) \
1485 (uint64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 0)
1487 #define vmax_s8(__a, __b) \
1488 (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1)
1490 #define vmax_s16(__a, __b) \
1491 (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1)
1493 #define vmax_s32(__a, __b) \
1494 (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1)
1496 #define vmax_f32(__a, __b) \
1497 (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 5)
1499 #define vmax_u8(__a, __b) \
1500 (uint8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 0)
1502 #define vmax_u16(__a, __b) \
1503 (uint16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 0)
1505 #define vmax_u32(__a, __b) \
1506 (uint32x2_t)__builtin_neon_vmaxv2si (__a, __b, 0)
1508 #define vmaxq_s8(__a, __b) \
1509 (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1)
1511 #define vmaxq_s16(__a, __b) \
1512 (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1)
1514 #define vmaxq_s32(__a, __b) \
1515 (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1)
1517 #define vmaxq_f32(__a, __b) \
1518 (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 5)
1520 #define vmaxq_u8(__a, __b) \
1521 (uint8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 0)
1523 #define vmaxq_u16(__a, __b) \
1524 (uint16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 0)
1526 #define vmaxq_u32(__a, __b) \
1527 (uint32x4_t)__builtin_neon_vmaxv4si (__a, __b, 0)
1529 #define vmin_s8(__a, __b) \
1530 (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1)
1532 #define vmin_s16(__a, __b) \
1533 (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1)
1535 #define vmin_s32(__a, __b) \
1536 (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1)
1538 #define vmin_f32(__a, __b) \
1539 (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 5)
1541 #define vmin_u8(__a, __b) \
1542 (uint8x8_t)__builtin_neon_vminv8qi (__a, __b, 0)
1544 #define vmin_u16(__a, __b) \
1545 (uint16x4_t)__builtin_neon_vminv4hi (__a, __b, 0)
1547 #define vmin_u32(__a, __b) \
1548 (uint32x2_t)__builtin_neon_vminv2si (__a, __b, 0)
1550 #define vminq_s8(__a, __b) \
1551 (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1)
1553 #define vminq_s16(__a, __b) \
1554 (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1)
1556 #define vminq_s32(__a, __b) \
1557 (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1)
1559 #define vminq_f32(__a, __b) \
1560 (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 5)
1562 #define vminq_u8(__a, __b) \
1563 (uint8x16_t)__builtin_neon_vminv16qi (__a, __b, 0)
1565 #define vminq_u16(__a, __b) \
1566 (uint16x8_t)__builtin_neon_vminv8hi (__a, __b, 0)
1568 #define vminq_u32(__a, __b) \
1569 (uint32x4_t)__builtin_neon_vminv4si (__a, __b, 0)
1571 #define vpadd_s8(__a, __b) \
1572 (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1)
1574 #define vpadd_s16(__a, __b) \
1575 (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1)
1577 #define vpadd_s32(__a, __b) \
1578 (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1)
1580 #define vpadd_f32(__a, __b) \
1581 (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 5)
1583 #define vpadd_u8(__a, __b) \
1584 (uint8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 0)
1586 #define vpadd_u16(__a, __b) \
1587 (uint16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 0)
1589 #define vpadd_u32(__a, __b) \
1590 (uint32x2_t)__builtin_neon_vpaddv2si (__a, __b, 0)
1592 #define vpaddl_s8(__a) \
1593 (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1)
1595 #define vpaddl_s16(__a) \
1596 (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1)
1598 #define vpaddl_s32(__a) \
1599 (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1)
1601 #define vpaddl_u8(__a) \
1602 (uint16x4_t)__builtin_neon_vpaddlv8qi (__a, 0)
1604 #define vpaddl_u16(__a) \
1605 (uint32x2_t)__builtin_neon_vpaddlv4hi (__a, 0)
1607 #define vpaddl_u32(__a) \
1608 (uint64x1_t)__builtin_neon_vpaddlv2si (__a, 0)
1610 #define vpaddlq_s8(__a) \
1611 (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1)
1613 #define vpaddlq_s16(__a) \
1614 (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1)
1616 #define vpaddlq_s32(__a) \
1617 (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1)
1619 #define vpaddlq_u8(__a) \
1620 (uint16x8_t)__builtin_neon_vpaddlv16qi (__a, 0)
1622 #define vpaddlq_u16(__a) \
1623 (uint32x4_t)__builtin_neon_vpaddlv8hi (__a, 0)
1625 #define vpaddlq_u32(__a) \
1626 (uint64x2_t)__builtin_neon_vpaddlv4si (__a, 0)
1628 #define vpadal_s8(__a, __b) \
1629 (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1)
1631 #define vpadal_s16(__a, __b) \
1632 (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1)
1634 #define vpadal_s32(__a, __b) \
1635 (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1)
1637 #define vpadal_u8(__a, __b) \
1638 (uint16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 0)
1640 #define vpadal_u16(__a, __b) \
1641 (uint32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 0)
1643 #define vpadal_u32(__a, __b) \
1644 (uint64x1_t)__builtin_neon_vpadalv2si (__a, __b, 0)
1646 #define vpadalq_s8(__a, __b) \
1647 (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1)
1649 #define vpadalq_s16(__a, __b) \
1650 (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1)
1652 #define vpadalq_s32(__a, __b) \
1653 (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1)
1655 #define vpadalq_u8(__a, __b) \
1656 (uint16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 0)
1658 #define vpadalq_u16(__a, __b) \
1659 (uint32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 0)
1661 #define vpadalq_u32(__a, __b) \
1662 (uint64x2_t)__builtin_neon_vpadalv4si (__a, __b, 0)
1664 #define vpmax_s8(__a, __b) \
1665 (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1)
1667 #define vpmax_s16(__a, __b) \
1668 (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1)
1670 #define vpmax_s32(__a, __b) \
1671 (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1)
1673 #define vpmax_f32(__a, __b) \
1674 (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 5)
1676 #define vpmax_u8(__a, __b) \
1677 (uint8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 0)
1679 #define vpmax_u16(__a, __b) \
1680 (uint16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 0)
1682 #define vpmax_u32(__a, __b) \
1683 (uint32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 0)
1685 #define vpmin_s8(__a, __b) \
1686 (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1)
1688 #define vpmin_s16(__a, __b) \
1689 (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1)
1691 #define vpmin_s32(__a, __b) \
1692 (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1)
1694 #define vpmin_f32(__a, __b) \
1695 (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 5)
1697 #define vpmin_u8(__a, __b) \
1698 (uint8x8_t)__builtin_neon_vpminv8qi (__a, __b, 0)
1700 #define vpmin_u16(__a, __b) \
1701 (uint16x4_t)__builtin_neon_vpminv4hi (__a, __b, 0)
1703 #define vpmin_u32(__a, __b) \
1704 (uint32x2_t)__builtin_neon_vpminv2si (__a, __b, 0)
1706 #define vrecps_f32(__a, __b) \
1707 (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 5)
1709 #define vrecpsq_f32(__a, __b) \
1710 (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 5)
1712 #define vrsqrts_f32(__a, __b) \
1713 (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 5)
1715 #define vrsqrtsq_f32(__a, __b) \
1716 (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 5)
1718 #define vshl_s8(__a, __b) \
1719 (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1)
1721 #define vshl_s16(__a, __b) \
1722 (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1)
1724 #define vshl_s32(__a, __b) \
1725 (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1)
1727 #define vshl_s64(__a, __b) \
1728 (int64x1_t)__builtin_neon_vshlv1di (__a, __b, 1)
1730 #define vshl_u8(__a, __b) \
1731 (uint8x8_t)__builtin_neon_vshlv8qi (__a, __b, 0)
1733 #define vshl_u16(__a, __b) \
1734 (uint16x4_t)__builtin_neon_vshlv4hi (__a, __b, 0)
1736 #define vshl_u32(__a, __b) \
1737 (uint32x2_t)__builtin_neon_vshlv2si (__a, __b, 0)
1739 #define vshl_u64(__a, __b) \
1740 (uint64x1_t)__builtin_neon_vshlv1di (__a, __b, 0)
1742 #define vshlq_s8(__a, __b) \
1743 (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1)
1745 #define vshlq_s16(__a, __b) \
1746 (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1)
1748 #define vshlq_s32(__a, __b) \
1749 (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1)
1751 #define vshlq_s64(__a, __b) \
1752 (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1)
1754 #define vshlq_u8(__a, __b) \
1755 (uint8x16_t)__builtin_neon_vshlv16qi (__a, __b, 0)
1757 #define vshlq_u16(__a, __b) \
1758 (uint16x8_t)__builtin_neon_vshlv8hi (__a, __b, 0)
1760 #define vshlq_u32(__a, __b) \
1761 (uint32x4_t)__builtin_neon_vshlv4si (__a, __b, 0)
1763 #define vshlq_u64(__a, __b) \
1764 (uint64x2_t)__builtin_neon_vshlv2di (__a, __b, 0)
1766 #define vrshl_s8(__a, __b) \
1767 (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 3)
1769 #define vrshl_s16(__a, __b) \
1770 (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 3)
1772 #define vrshl_s32(__a, __b) \
1773 (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 3)
1775 #define vrshl_s64(__a, __b) \
1776 (int64x1_t)__builtin_neon_vshlv1di (__a, __b, 3)
1778 #define vrshl_u8(__a, __b) \
1779 (uint8x8_t)__builtin_neon_vshlv8qi (__a, __b, 2)
1781 #define vrshl_u16(__a, __b) \
1782 (uint16x4_t)__builtin_neon_vshlv4hi (__a, __b, 2)
1784 #define vrshl_u32(__a, __b) \
1785 (uint32x2_t)__builtin_neon_vshlv2si (__a, __b, 2)
1787 #define vrshl_u64(__a, __b) \
1788 (uint64x1_t)__builtin_neon_vshlv1di (__a, __b, 2)
1790 #define vrshlq_s8(__a, __b) \
1791 (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 3)
1793 #define vrshlq_s16(__a, __b) \
1794 (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 3)
1796 #define vrshlq_s32(__a, __b) \
1797 (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 3)
1799 #define vrshlq_s64(__a, __b) \
1800 (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 3)
1802 #define vrshlq_u8(__a, __b) \
1803 (uint8x16_t)__builtin_neon_vshlv16qi (__a, __b, 2)
1805 #define vrshlq_u16(__a, __b) \
1806 (uint16x8_t)__builtin_neon_vshlv8hi (__a, __b, 2)
1808 #define vrshlq_u32(__a, __b) \
1809 (uint32x4_t)__builtin_neon_vshlv4si (__a, __b, 2)
1811 #define vrshlq_u64(__a, __b) \
1812 (uint64x2_t)__builtin_neon_vshlv2di (__a, __b, 2)
1814 #define vqshl_s8(__a, __b) \
1815 (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1)
1817 #define vqshl_s16(__a, __b) \
1818 (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1)
1820 #define vqshl_s32(__a, __b) \
1821 (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1)
1823 #define vqshl_s64(__a, __b) \
1824 (int64x1_t)__builtin_neon_vqshlv1di (__a, __b, 1)
1826 #define vqshl_u8(__a, __b) \
1827 (uint8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 0)
1829 #define vqshl_u16(__a, __b) \
1830 (uint16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 0)
1832 #define vqshl_u32(__a, __b) \
1833 (uint32x2_t)__builtin_neon_vqshlv2si (__a, __b, 0)
1835 #define vqshl_u64(__a, __b) \
1836 (uint64x1_t)__builtin_neon_vqshlv1di (__a, __b, 0)
1838 #define vqshlq_s8(__a, __b) \
1839 (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1)
1841 #define vqshlq_s16(__a, __b) \
1842 (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1)
1844 #define vqshlq_s32(__a, __b) \
1845 (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1)
1847 #define vqshlq_s64(__a, __b) \
1848 (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1)
1850 #define vqshlq_u8(__a, __b) \
1851 (uint8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 0)
1853 #define vqshlq_u16(__a, __b) \
1854 (uint16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 0)
1856 #define vqshlq_u32(__a, __b) \
1857 (uint32x4_t)__builtin_neon_vqshlv4si (__a, __b, 0)
1859 #define vqshlq_u64(__a, __b) \
1860 (uint64x2_t)__builtin_neon_vqshlv2di (__a, __b, 0)
1862 #define vqrshl_s8(__a, __b) \
1863 (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 3)
1865 #define vqrshl_s16(__a, __b) \
1866 (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 3)
1868 #define vqrshl_s32(__a, __b) \
1869 (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 3)
1871 #define vqrshl_s64(__a, __b) \
1872 (int64x1_t)__builtin_neon_vqshlv1di (__a, __b, 3)
1874 #define vqrshl_u8(__a, __b) \
1875 (uint8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 2)
1877 #define vqrshl_u16(__a, __b) \
1878 (uint16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 2)
1880 #define vqrshl_u32(__a, __b) \
1881 (uint32x2_t)__builtin_neon_vqshlv2si (__a, __b, 2)
1883 #define vqrshl_u64(__a, __b) \
1884 (uint64x1_t)__builtin_neon_vqshlv1di (__a, __b, 2)
1886 #define vqrshlq_s8(__a, __b) \
1887 (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 3)
1889 #define vqrshlq_s16(__a, __b) \
1890 (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 3)
1892 #define vqrshlq_s32(__a, __b) \
1893 (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 3)
1895 #define vqrshlq_s64(__a, __b) \
1896 (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 3)
1898 #define vqrshlq_u8(__a, __b) \
1899 (uint8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 2)
1901 #define vqrshlq_u16(__a, __b) \
1902 (uint16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 2)
1904 #define vqrshlq_u32(__a, __b) \
1905 (uint32x4_t)__builtin_neon_vqshlv4si (__a, __b, 2)
1907 #define vqrshlq_u64(__a, __b) \
1908 (uint64x2_t)__builtin_neon_vqshlv2di (__a, __b, 2)
1910 #define vshr_n_s8(__a, __b) \
1911 (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1)
1913 #define vshr_n_s16(__a, __b) \
1914 (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1)
1916 #define vshr_n_s32(__a, __b) \
1917 (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1)
1919 #define vshr_n_s64(__a, __b) \
1920 (int64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 1)
1922 #define vshr_n_u8(__a, __b) \
1923 (uint8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 0)
1925 #define vshr_n_u16(__a, __b) \
1926 (uint16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 0)
1928 #define vshr_n_u32(__a, __b) \
1929 (uint32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 0)
1931 #define vshr_n_u64(__a, __b) \
1932 (uint64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 0)
1934 #define vshrq_n_s8(__a, __b) \
1935 (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1)
1937 #define vshrq_n_s16(__a, __b) \
1938 (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1)
1940 #define vshrq_n_s32(__a, __b) \
1941 (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1)
1943 #define vshrq_n_s64(__a, __b) \
1944 (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1)
1946 #define vshrq_n_u8(__a, __b) \
1947 (uint8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 0)
1949 #define vshrq_n_u16(__a, __b) \
1950 (uint16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 0)
1952 #define vshrq_n_u32(__a, __b) \
1953 (uint32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 0)
1955 #define vshrq_n_u64(__a, __b) \
1956 (uint64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 0)
1958 #define vrshr_n_s8(__a, __b) \
1959 (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 3)
1961 #define vrshr_n_s16(__a, __b) \
1962 (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 3)
1964 #define vrshr_n_s32(__a, __b) \
1965 (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 3)
1967 #define vrshr_n_s64(__a, __b) \
1968 (int64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 3)
1970 #define vrshr_n_u8(__a, __b) \
1971 (uint8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 2)
1973 #define vrshr_n_u16(__a, __b) \
1974 (uint16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 2)
1976 #define vrshr_n_u32(__a, __b) \
1977 (uint32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 2)
1979 #define vrshr_n_u64(__a, __b) \
1980 (uint64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 2)
1982 #define vrshrq_n_s8(__a, __b) \
1983 (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 3)
1985 #define vrshrq_n_s16(__a, __b) \
1986 (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 3)
1988 #define vrshrq_n_s32(__a, __b) \
1989 (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 3)
1991 #define vrshrq_n_s64(__a, __b) \
1992 (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 3)
1994 #define vrshrq_n_u8(__a, __b) \
1995 (uint8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 2)
1997 #define vrshrq_n_u16(__a, __b) \
1998 (uint16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 2)
2000 #define vrshrq_n_u32(__a, __b) \
2001 (uint32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 2)
2003 #define vrshrq_n_u64(__a, __b) \
2004 (uint64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 2)
2006 #define vshrn_n_s16(__a, __b) \
2007 (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1)
2009 #define vshrn_n_s32(__a, __b) \
2010 (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1)
2012 #define vshrn_n_s64(__a, __b) \
2013 (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1)
2015 #define vshrn_n_u16(__a, __b) \
2016 (uint8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 0)
2018 #define vshrn_n_u32(__a, __b) \
2019 (uint16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 0)
2021 #define vshrn_n_u64(__a, __b) \
2022 (uint32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 0)
2024 #define vrshrn_n_s16(__a, __b) \
2025 (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 3)
2027 #define vrshrn_n_s32(__a, __b) \
2028 (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 3)
2030 #define vrshrn_n_s64(__a, __b) \
2031 (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 3)
2033 #define vrshrn_n_u16(__a, __b) \
2034 (uint8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 2)
2036 #define vrshrn_n_u32(__a, __b) \
2037 (uint16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 2)
2039 #define vrshrn_n_u64(__a, __b) \
2040 (uint32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 2)
2042 #define vqshrn_n_s16(__a, __b) \
2043 (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1)
2045 #define vqshrn_n_s32(__a, __b) \
2046 (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1)
2048 #define vqshrn_n_s64(__a, __b) \
2049 (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1)
2051 #define vqshrn_n_u16(__a, __b) \
2052 (uint8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 0)
2054 #define vqshrn_n_u32(__a, __b) \
2055 (uint16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 0)
2057 #define vqshrn_n_u64(__a, __b) \
2058 (uint32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 0)
2060 #define vqrshrn_n_s16(__a, __b) \
2061 (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 3)
2063 #define vqrshrn_n_s32(__a, __b) \
2064 (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 3)
2066 #define vqrshrn_n_s64(__a, __b) \
2067 (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 3)
2069 #define vqrshrn_n_u16(__a, __b) \
2070 (uint8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 2)
2072 #define vqrshrn_n_u32(__a, __b) \
2073 (uint16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 2)
2075 #define vqrshrn_n_u64(__a, __b) \
2076 (uint32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 2)
2078 #define vqshrun_n_s16(__a, __b) \
2079 (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1)
2081 #define vqshrun_n_s32(__a, __b) \
2082 (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1)
2084 #define vqshrun_n_s64(__a, __b) \
2085 (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1)
2087 #define vqrshrun_n_s16(__a, __b) \
2088 (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 3)
2090 #define vqrshrun_n_s32(__a, __b) \
2091 (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 3)
2093 #define vqrshrun_n_s64(__a, __b) \
2094 (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 3)
2096 #define vshl_n_s8(__a, __b) \
2097 (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1)
2099 #define vshl_n_s16(__a, __b) \
2100 (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1)
2102 #define vshl_n_s32(__a, __b) \
2103 (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1)
2105 #define vshl_n_s64(__a, __b) \
2106 (int64x1_t)__builtin_neon_vshl_nv1di (__a, __b, 1)
2108 #define vshl_n_u8(__a, __b) \
2109 (uint8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 0)
2111 #define vshl_n_u16(__a, __b) \
2112 (uint16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 0)
2114 #define vshl_n_u32(__a, __b) \
2115 (uint32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 0)
2117 #define vshl_n_u64(__a, __b) \
2118 (uint64x1_t)__builtin_neon_vshl_nv1di (__a, __b, 0)
2120 #define vshlq_n_s8(__a, __b) \
2121 (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1)
2123 #define vshlq_n_s16(__a, __b) \
2124 (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1)
2126 #define vshlq_n_s32(__a, __b) \
2127 (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1)
2129 #define vshlq_n_s64(__a, __b) \
2130 (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1)
2132 #define vshlq_n_u8(__a, __b) \
2133 (uint8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 0)
2135 #define vshlq_n_u16(__a, __b) \
2136 (uint16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 0)
2138 #define vshlq_n_u32(__a, __b) \
2139 (uint32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 0)
2141 #define vshlq_n_u64(__a, __b) \
2142 (uint64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 0)
2144 #define vqshl_n_s8(__a, __b) \
2145 (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1)
2147 #define vqshl_n_s16(__a, __b) \
2148 (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1)
2150 #define vqshl_n_s32(__a, __b) \
2151 (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1)
2153 #define vqshl_n_s64(__a, __b) \
2154 (int64x1_t)__builtin_neon_vqshl_nv1di (__a, __b, 1)
2156 #define vqshl_n_u8(__a, __b) \
2157 (uint8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 0)
2159 #define vqshl_n_u16(__a, __b) \
2160 (uint16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 0)
2162 #define vqshl_n_u32(__a, __b) \
2163 (uint32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 0)
2165 #define vqshl_n_u64(__a, __b) \
2166 (uint64x1_t)__builtin_neon_vqshl_nv1di (__a, __b, 0)
2168 #define vqshlq_n_s8(__a, __b) \
2169 (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1)
2171 #define vqshlq_n_s16(__a, __b) \
2172 (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1)
2174 #define vqshlq_n_s32(__a, __b) \
2175 (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1)
2177 #define vqshlq_n_s64(__a, __b) \
2178 (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1)
2180 #define vqshlq_n_u8(__a, __b) \
2181 (uint8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 0)
2183 #define vqshlq_n_u16(__a, __b) \
2184 (uint16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 0)
2186 #define vqshlq_n_u32(__a, __b) \
2187 (uint32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 0)
2189 #define vqshlq_n_u64(__a, __b) \
2190 (uint64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 0)
2192 #define vqshlu_n_s8(__a, __b) \
2193 (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1)
2195 #define vqshlu_n_s16(__a, __b) \
2196 (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1)
2198 #define vqshlu_n_s32(__a, __b) \
2199 (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1)
2201 #define vqshlu_n_s64(__a, __b) \
2202 (uint64x1_t)__builtin_neon_vqshlu_nv1di (__a, __b, 1)
2204 #define vqshluq_n_s8(__a, __b) \
2205 (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1)
2207 #define vqshluq_n_s16(__a, __b) \
2208 (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1)
2210 #define vqshluq_n_s32(__a, __b) \
2211 (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1)
2213 #define vqshluq_n_s64(__a, __b) \
2214 (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1)
2216 #define vshll_n_s8(__a, __b) \
2217 (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1)
2219 #define vshll_n_s16(__a, __b) \
2220 (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1)
2222 #define vshll_n_s32(__a, __b) \
2223 (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1)
2225 #define vshll_n_u8(__a, __b) \
2226 (uint16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 0)
2228 #define vshll_n_u16(__a, __b) \
2229 (uint32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 0)
2231 #define vshll_n_u32(__a, __b) \
2232 (uint64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 0)
2234 #define vsra_n_s8(__a, __b, __c) \
2235 (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1)
2237 #define vsra_n_s16(__a, __b, __c) \
2238 (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1)
2240 #define vsra_n_s32(__a, __b, __c) \
2241 (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1)
2243 #define vsra_n_s64(__a, __b, __c) \
2244 (int64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 1)
2246 #define vsra_n_u8(__a, __b, __c) \
2247 (uint8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 0)
2249 #define vsra_n_u16(__a, __b, __c) \
2250 (uint16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 0)
2252 #define vsra_n_u32(__a, __b, __c) \
2253 (uint32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 0)
2255 #define vsra_n_u64(__a, __b, __c) \
2256 (uint64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 0)
2258 #define vsraq_n_s8(__a, __b, __c) \
2259 (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1)
2261 #define vsraq_n_s16(__a, __b, __c) \
2262 (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1)
2264 #define vsraq_n_s32(__a, __b, __c) \
2265 (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1)
2267 #define vsraq_n_s64(__a, __b, __c) \
2268 (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1)
2270 #define vsraq_n_u8(__a, __b, __c) \
2271 (uint8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 0)
2273 #define vsraq_n_u16(__a, __b, __c) \
2274 (uint16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 0)
2276 #define vsraq_n_u32(__a, __b, __c) \
2277 (uint32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 0)
2279 #define vsraq_n_u64(__a, __b, __c) \
2280 (uint64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 0)
2282 #define vrsra_n_s8(__a, __b, __c) \
2283 (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 3)
2285 #define vrsra_n_s16(__a, __b, __c) \
2286 (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 3)
2288 #define vrsra_n_s32(__a, __b, __c) \
2289 (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 3)
2291 #define vrsra_n_s64(__a, __b, __c) \
2292 (int64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 3)
2294 #define vrsra_n_u8(__a, __b, __c) \
2295 (uint8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 2)
2297 #define vrsra_n_u16(__a, __b, __c) \
2298 (uint16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 2)
2300 #define vrsra_n_u32(__a, __b, __c) \
2301 (uint32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 2)
2303 #define vrsra_n_u64(__a, __b, __c) \
2304 (uint64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 2)
2306 #define vrsraq_n_s8(__a, __b, __c) \
2307 (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 3)
2309 #define vrsraq_n_s16(__a, __b, __c) \
2310 (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 3)
2312 #define vrsraq_n_s32(__a, __b, __c) \
2313 (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 3)
2315 #define vrsraq_n_s64(__a, __b, __c) \
2316 (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 3)
2318 #define vrsraq_n_u8(__a, __b, __c) \
2319 (uint8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 2)
2321 #define vrsraq_n_u16(__a, __b, __c) \
2322 (uint16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 2)
2324 #define vrsraq_n_u32(__a, __b, __c) \
2325 (uint32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 2)
2327 #define vrsraq_n_u64(__a, __b, __c) \
2328 (uint64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 2)
2330 #define vsri_n_s8(__a, __b, __c) \
2331 (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c)
2333 #define vsri_n_s16(__a, __b, __c) \
2334 (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c)
2336 #define vsri_n_s32(__a, __b, __c) \
2337 (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c)
2339 #define vsri_n_s64(__a, __b, __c) \
2340 (int64x1_t)__builtin_neon_vsri_nv1di (__a, __b, __c)
2342 #define vsri_n_u8(__a, __b, __c) \
2343 (uint8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c)
2345 #define vsri_n_u16(__a, __b, __c) \
2346 (uint16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c)
2348 #define vsri_n_u32(__a, __b, __c) \
2349 (uint32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c)
2351 #define vsri_n_u64(__a, __b, __c) \
2352 (uint64x1_t)__builtin_neon_vsri_nv1di (__a, __b, __c)
2354 #define vsri_n_p8(__a, __b, __c) \
2355 (poly8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c)
2357 #define vsri_n_p16(__a, __b, __c) \
2358 (poly16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c)
2360 #define vsriq_n_s8(__a, __b, __c) \
2361 (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c)
2363 #define vsriq_n_s16(__a, __b, __c) \
2364 (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c)
2366 #define vsriq_n_s32(__a, __b, __c) \
2367 (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c)
2369 #define vsriq_n_s64(__a, __b, __c) \
2370 (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c)
2372 #define vsriq_n_u8(__a, __b, __c) \
2373 (uint8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c)
2375 #define vsriq_n_u16(__a, __b, __c) \
2376 (uint16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c)
2378 #define vsriq_n_u32(__a, __b, __c) \
2379 (uint32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c)
2381 #define vsriq_n_u64(__a, __b, __c) \
2382 (uint64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c)
2384 #define vsriq_n_p8(__a, __b, __c) \
2385 (poly8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c)
2387 #define vsriq_n_p16(__a, __b, __c) \
2388 (poly16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c)
2390 #define vsli_n_s8(__a, __b, __c) \
2391 (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c)
2393 #define vsli_n_s16(__a, __b, __c) \
2394 (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c)
2396 #define vsli_n_s32(__a, __b, __c) \
2397 (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c)
2399 #define vsli_n_s64(__a, __b, __c) \
2400 (int64x1_t)__builtin_neon_vsli_nv1di (__a, __b, __c)
2402 #define vsli_n_u8(__a, __b, __c) \
2403 (uint8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c)
2405 #define vsli_n_u16(__a, __b, __c) \
2406 (uint16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c)
2408 #define vsli_n_u32(__a, __b, __c) \
2409 (uint32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c)
2411 #define vsli_n_u64(__a, __b, __c) \
2412 (uint64x1_t)__builtin_neon_vsli_nv1di (__a, __b, __c)
2414 #define vsli_n_p8(__a, __b, __c) \
2415 (poly8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c)
2417 #define vsli_n_p16(__a, __b, __c) \
2418 (poly16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c)
2420 #define vsliq_n_s8(__a, __b, __c) \
2421 (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c)
2423 #define vsliq_n_s16(__a, __b, __c) \
2424 (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c)
2426 #define vsliq_n_s32(__a, __b, __c) \
2427 (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c)
2429 #define vsliq_n_s64(__a, __b, __c) \
2430 (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c)
2432 #define vsliq_n_u8(__a, __b, __c) \
2433 (uint8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c)
2435 #define vsliq_n_u16(__a, __b, __c) \
2436 (uint16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c)
2438 #define vsliq_n_u32(__a, __b, __c) \
2439 (uint32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c)
2441 #define vsliq_n_u64(__a, __b, __c) \
2442 (uint64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c)
2444 #define vsliq_n_p8(__a, __b, __c) \
2445 (poly8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c)
2447 #define vsliq_n_p16(__a, __b, __c) \
2448 (poly16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c)
2450 #define vabs_s8(__a) \
2451 (int8x8_t)__builtin_neon_vabsv8qi (__a, 1)
2453 #define vabs_s16(__a) \
2454 (int16x4_t)__builtin_neon_vabsv4hi (__a, 1)
2456 #define vabs_s32(__a) \
2457 (int32x2_t)__builtin_neon_vabsv2si (__a, 1)
2459 #define vabs_f32(__a) \
2460 (float32x2_t)__builtin_neon_vabsv2sf (__a, 5)
2462 #define vabsq_s8(__a) \
2463 (int8x16_t)__builtin_neon_vabsv16qi (__a, 1)
2465 #define vabsq_s16(__a) \
2466 (int16x8_t)__builtin_neon_vabsv8hi (__a, 1)
2468 #define vabsq_s32(__a) \
2469 (int32x4_t)__builtin_neon_vabsv4si (__a, 1)
2471 #define vabsq_f32(__a) \
2472 (float32x4_t)__builtin_neon_vabsv4sf (__a, 5)
2474 #define vqabs_s8(__a) \
2475 (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1)
2477 #define vqabs_s16(__a) \
2478 (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1)
2480 #define vqabs_s32(__a) \
2481 (int32x2_t)__builtin_neon_vqabsv2si (__a, 1)
2483 #define vqabsq_s8(__a) \
2484 (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1)
2486 #define vqabsq_s16(__a) \
2487 (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1)
2489 #define vqabsq_s32(__a) \
2490 (int32x4_t)__builtin_neon_vqabsv4si (__a, 1)
2492 #define vneg_s8(__a) \
2493 (int8x8_t)__builtin_neon_vnegv8qi (__a, 1)
2495 #define vneg_s16(__a) \
2496 (int16x4_t)__builtin_neon_vnegv4hi (__a, 1)
2498 #define vneg_s32(__a) \
2499 (int32x2_t)__builtin_neon_vnegv2si (__a, 1)
2501 #define vneg_f32(__a) \
2502 (float32x2_t)__builtin_neon_vnegv2sf (__a, 5)
2504 #define vnegq_s8(__a) \
2505 (int8x16_t)__builtin_neon_vnegv16qi (__a, 1)
2507 #define vnegq_s16(__a) \
2508 (int16x8_t)__builtin_neon_vnegv8hi (__a, 1)
2510 #define vnegq_s32(__a) \
2511 (int32x4_t)__builtin_neon_vnegv4si (__a, 1)
2513 #define vnegq_f32(__a) \
2514 (float32x4_t)__builtin_neon_vnegv4sf (__a, 5)
2516 #define vqneg_s8(__a) \
2517 (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1)
2519 #define vqneg_s16(__a) \
2520 (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1)
2522 #define vqneg_s32(__a) \
2523 (int32x2_t)__builtin_neon_vqnegv2si (__a, 1)
2525 #define vqnegq_s8(__a) \
2526 (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1)
2528 #define vqnegq_s16(__a) \
2529 (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1)
2531 #define vqnegq_s32(__a) \
2532 (int32x4_t)__builtin_neon_vqnegv4si (__a, 1)
2534 #define vmvn_s8(__a) \
2535 (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1)
2537 #define vmvn_s16(__a) \
2538 (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1)
2540 #define vmvn_s32(__a) \
2541 (int32x2_t)__builtin_neon_vmvnv2si (__a, 1)
2543 #define vmvn_u8(__a) \
2544 (uint8x8_t)__builtin_neon_vmvnv8qi (__a, 0)
2546 #define vmvn_u16(__a) \
2547 (uint16x4_t)__builtin_neon_vmvnv4hi (__a, 0)
2549 #define vmvn_u32(__a) \
2550 (uint32x2_t)__builtin_neon_vmvnv2si (__a, 0)
2552 #define vmvn_p8(__a) \
2553 (poly8x8_t)__builtin_neon_vmvnv8qi (__a, 4)
2555 #define vmvnq_s8(__a) \
2556 (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1)
2558 #define vmvnq_s16(__a) \
2559 (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1)
2561 #define vmvnq_s32(__a) \
2562 (int32x4_t)__builtin_neon_vmvnv4si (__a, 1)
2564 #define vmvnq_u8(__a) \
2565 (uint8x16_t)__builtin_neon_vmvnv16qi (__a, 0)
2567 #define vmvnq_u16(__a) \
2568 (uint16x8_t)__builtin_neon_vmvnv8hi (__a, 0)
2570 #define vmvnq_u32(__a) \
2571 (uint32x4_t)__builtin_neon_vmvnv4si (__a, 0)
2573 #define vmvnq_p8(__a) \
2574 (poly8x16_t)__builtin_neon_vmvnv16qi (__a, 4)
2576 #define vcls_s8(__a) \
2577 (int8x8_t)__builtin_neon_vclsv8qi (__a, 1)
2579 #define vcls_s16(__a) \
2580 (int16x4_t)__builtin_neon_vclsv4hi (__a, 1)
2582 #define vcls_s32(__a) \
2583 (int32x2_t)__builtin_neon_vclsv2si (__a, 1)
2585 #define vclsq_s8(__a) \
2586 (int8x16_t)__builtin_neon_vclsv16qi (__a, 1)
2588 #define vclsq_s16(__a) \
2589 (int16x8_t)__builtin_neon_vclsv8hi (__a, 1)
2591 #define vclsq_s32(__a) \
2592 (int32x4_t)__builtin_neon_vclsv4si (__a, 1)
2594 #define vclz_s8(__a) \
2595 (int8x8_t)__builtin_neon_vclzv8qi (__a, 1)
2597 #define vclz_s16(__a) \
2598 (int16x4_t)__builtin_neon_vclzv4hi (__a, 1)
2600 #define vclz_s32(__a) \
2601 (int32x2_t)__builtin_neon_vclzv2si (__a, 1)
2603 #define vclz_u8(__a) \
2604 (uint8x8_t)__builtin_neon_vclzv8qi (__a, 0)
2606 #define vclz_u16(__a) \
2607 (uint16x4_t)__builtin_neon_vclzv4hi (__a, 0)
2609 #define vclz_u32(__a) \
2610 (uint32x2_t)__builtin_neon_vclzv2si (__a, 0)
2612 #define vclzq_s8(__a) \
2613 (int8x16_t)__builtin_neon_vclzv16qi (__a, 1)
2615 #define vclzq_s16(__a) \
2616 (int16x8_t)__builtin_neon_vclzv8hi (__a, 1)
2618 #define vclzq_s32(__a) \
2619 (int32x4_t)__builtin_neon_vclzv4si (__a, 1)
2621 #define vclzq_u8(__a) \
2622 (uint8x16_t)__builtin_neon_vclzv16qi (__a, 0)
2624 #define vclzq_u16(__a) \
2625 (uint16x8_t)__builtin_neon_vclzv8hi (__a, 0)
2627 #define vclzq_u32(__a) \
2628 (uint32x4_t)__builtin_neon_vclzv4si (__a, 0)
2630 #define vcnt_s8(__a) \
2631 (int8x8_t)__builtin_neon_vcntv8qi (__a, 1)
2633 #define vcnt_u8(__a) \
2634 (uint8x8_t)__builtin_neon_vcntv8qi (__a, 0)
2636 #define vcnt_p8(__a) \
2637 (poly8x8_t)__builtin_neon_vcntv8qi (__a, 4)
2639 #define vcntq_s8(__a) \
2640 (int8x16_t)__builtin_neon_vcntv16qi (__a, 1)
2642 #define vcntq_u8(__a) \
2643 (uint8x16_t)__builtin_neon_vcntv16qi (__a, 0)
2645 #define vcntq_p8(__a) \
2646 (poly8x16_t)__builtin_neon_vcntv16qi (__a, 4)
2648 #define vrecpe_f32(__a) \
2649 (float32x2_t)__builtin_neon_vrecpev2sf (__a, 5)
2651 #define vrecpe_u32(__a) \
2652 (uint32x2_t)__builtin_neon_vrecpev2si (__a, 0)
2654 #define vrecpeq_f32(__a) \
2655 (float32x4_t)__builtin_neon_vrecpev4sf (__a, 5)
2657 #define vrecpeq_u32(__a) \
2658 (uint32x4_t)__builtin_neon_vrecpev4si (__a, 0)
2660 #define vrsqrte_f32(__a) \
2661 (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 5)
2663 #define vrsqrte_u32(__a) \
2664 (uint32x2_t)__builtin_neon_vrsqrtev2si (__a, 0)
2666 #define vrsqrteq_f32(__a) \
2667 (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 5)
2669 #define vrsqrteq_u32(__a) \
2670 (uint32x4_t)__builtin_neon_vrsqrtev4si (__a, 0)
2672 #define vget_lane_s8(__a, __b) \
2673 (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1)
2675 #define vget_lane_s16(__a, __b) \
2676 (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1)
2678 #define vget_lane_s32(__a, __b) \
2679 (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1)
2681 #define vget_lane_f32(__a, __b) \
2682 (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 5)
2684 #define vget_lane_u8(__a, __b) \
2685 (uint8_t)__builtin_neon_vget_lanev8qi (__a, __b, 0)
2687 #define vget_lane_u16(__a, __b) \
2688 (uint16_t)__builtin_neon_vget_lanev4hi (__a, __b, 0)
2690 #define vget_lane_u32(__a, __b) \
2691 (uint32_t)__builtin_neon_vget_lanev2si (__a, __b, 0)
2693 #define vget_lane_p8(__a, __b) \
2694 (poly8_t)__builtin_neon_vget_lanev8qi (__a, __b, 4)
2696 #define vget_lane_p16(__a, __b) \
2697 (poly16_t)__builtin_neon_vget_lanev4hi (__a, __b, 4)
2699 #define vget_lane_s64(__a, __b) \
2700 (int64_t)__builtin_neon_vget_lanev1di (__a, __b, 1)
2702 #define vget_lane_u64(__a, __b) \
2703 (uint64_t)__builtin_neon_vget_lanev1di (__a, __b, 0)
2705 #define vgetq_lane_s8(__a, __b) \
2706 (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1)
2708 #define vgetq_lane_s16(__a, __b) \
2709 (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1)
2711 #define vgetq_lane_s32(__a, __b) \
2712 (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1)
2714 #define vgetq_lane_f32(__a, __b) \
2715 (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 5)
2717 #define vgetq_lane_u8(__a, __b) \
2718 (uint8_t)__builtin_neon_vget_lanev16qi (__a, __b, 0)
2720 #define vgetq_lane_u16(__a, __b) \
2721 (uint16_t)__builtin_neon_vget_lanev8hi (__a, __b, 0)
2723 #define vgetq_lane_u32(__a, __b) \
2724 (uint32_t)__builtin_neon_vget_lanev4si (__a, __b, 0)
2726 #define vgetq_lane_p8(__a, __b) \
2727 (poly8_t)__builtin_neon_vget_lanev16qi (__a, __b, 4)
2729 #define vgetq_lane_p16(__a, __b) \
2730 (poly16_t)__builtin_neon_vget_lanev8hi (__a, __b, 4)
2732 #define vgetq_lane_s64(__a, __b) \
2733 (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1)
2735 #define vgetq_lane_u64(__a, __b) \
2736 (uint64_t)__builtin_neon_vget_lanev2di (__a, __b, 0)
2738 #define vset_lane_s8(__a, __b, __c) \
2739 (int8x8_t)__builtin_neon_vset_lanev8qi (__a, __b, __c)
2741 #define vset_lane_s16(__a, __b, __c) \
2742 (int16x4_t)__builtin_neon_vset_lanev4hi (__a, __b, __c)
2744 #define vset_lane_s32(__a, __b, __c) \
2745 (int32x2_t)__builtin_neon_vset_lanev2si (__a, __b, __c)
2747 #define vset_lane_f32(__a, __b, __c) \
2748 (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c)
2750 #define vset_lane_u8(__a, __b, __c) \
2751 (uint8x8_t)__builtin_neon_vset_lanev8qi (__a, __b, __c)
2753 #define vset_lane_u16(__a, __b, __c) \
2754 (uint16x4_t)__builtin_neon_vset_lanev4hi (__a, __b, __c)
2756 #define vset_lane_u32(__a, __b, __c) \
2757 (uint32x2_t)__builtin_neon_vset_lanev2si (__a, __b, __c)
2759 #define vset_lane_p8(__a, __b, __c) \
2760 (poly8x8_t)__builtin_neon_vset_lanev8qi (__a, __b, __c)
2762 #define vset_lane_p16(__a, __b, __c) \
2763 (poly16x4_t)__builtin_neon_vset_lanev4hi (__a, __b, __c)
2765 #define vset_lane_s64(__a, __b, __c) \
2766 (int64x1_t)__builtin_neon_vset_lanev1di (__a, __b, __c)
2768 #define vset_lane_u64(__a, __b, __c) \
2769 (uint64x1_t)__builtin_neon_vset_lanev1di (__a, __b, __c)
2771 #define vsetq_lane_s8(__a, __b, __c) \
2772 (int8x16_t)__builtin_neon_vset_lanev16qi (__a, __b, __c)
2774 #define vsetq_lane_s16(__a, __b, __c) \
2775 (int16x8_t)__builtin_neon_vset_lanev8hi (__a, __b, __c)
2777 #define vsetq_lane_s32(__a, __b, __c) \
2778 (int32x4_t)__builtin_neon_vset_lanev4si (__a, __b, __c)
2780 #define vsetq_lane_f32(__a, __b, __c) \
2781 (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c)
2783 #define vsetq_lane_u8(__a, __b, __c) \
2784 (uint8x16_t)__builtin_neon_vset_lanev16qi (__a, __b, __c)
2786 #define vsetq_lane_u16(__a, __b, __c) \
2787 (uint16x8_t)__builtin_neon_vset_lanev8hi (__a, __b, __c)
2789 #define vsetq_lane_u32(__a, __b, __c) \
2790 (uint32x4_t)__builtin_neon_vset_lanev4si (__a, __b, __c)
2792 #define vsetq_lane_p8(__a, __b, __c) \
2793 (poly8x16_t)__builtin_neon_vset_lanev16qi (__a, __b, __c)
2795 #define vsetq_lane_p16(__a, __b, __c) \
2796 (poly16x8_t)__builtin_neon_vset_lanev8hi (__a, __b, __c)
2798 #define vsetq_lane_s64(__a, __b, __c) \
2799 (int64x2_t)__builtin_neon_vset_lanev2di (__a, __b, __c)
2801 #define vsetq_lane_u64(__a, __b, __c) \
2802 (uint64x2_t)__builtin_neon_vset_lanev2di (__a, __b, __c)
2804 #define vcreate_s8(__a) \
2805 (int8x8_t)__builtin_neon_vcreatev8qi (__a)
2807 #define vcreate_s16(__a) \
2808 (int16x4_t)__builtin_neon_vcreatev4hi (__a)
2810 #define vcreate_s32(__a) \
2811 (int32x2_t)__builtin_neon_vcreatev2si (__a)
2813 #define vcreate_s64(__a) \
2814 (int64x1_t)__builtin_neon_vcreatev1di (__a)
2816 #define vcreate_f32(__a) \
2817 (float32x2_t)__builtin_neon_vcreatev2sf (__a)
2819 #define vcreate_u8(__a) \
2820 (uint8x8_t)__builtin_neon_vcreatev8qi (__a)
2822 #define vcreate_u16(__a) \
2823 (uint16x4_t)__builtin_neon_vcreatev4hi (__a)
2825 #define vcreate_u32(__a) \
2826 (uint32x2_t)__builtin_neon_vcreatev2si (__a)
2828 #define vcreate_u64(__a) \
2829 (uint64x1_t)__builtin_neon_vcreatev1di (__a)
2831 #define vcreate_p8(__a) \
2832 (poly8x8_t)__builtin_neon_vcreatev8qi (__a)
2834 #define vcreate_p16(__a) \
2835 (poly16x4_t)__builtin_neon_vcreatev4hi (__a)
2837 #define vdup_n_s8(__a) \
2838 (int8x8_t)__builtin_neon_vdup_nv8qi (__a)
2840 #define vdup_n_s16(__a) \
2841 (int16x4_t)__builtin_neon_vdup_nv4hi (__a)
2843 #define vdup_n_s32(__a) \
2844 (int32x2_t)__builtin_neon_vdup_nv2si (__a)
2846 #define vdup_n_f32(__a) \
2847 (float32x2_t)__builtin_neon_vdup_nv2sf (__a)
2849 #define vdup_n_u8(__a) \
2850 (uint8x8_t)__builtin_neon_vdup_nv8qi (__a)
2852 #define vdup_n_u16(__a) \
2853 (uint16x4_t)__builtin_neon_vdup_nv4hi (__a)
2855 #define vdup_n_u32(__a) \
2856 (uint32x2_t)__builtin_neon_vdup_nv2si (__a)
2858 #define vdup_n_p8(__a) \
2859 (poly8x8_t)__builtin_neon_vdup_nv8qi (__a)
2861 #define vdup_n_p16(__a) \
2862 (poly16x4_t)__builtin_neon_vdup_nv4hi (__a)
2864 #define vdup_n_s64(__a) \
2865 (int64x1_t)__builtin_neon_vdup_nv1di (__a)
2867 #define vdup_n_u64(__a) \
2868 (uint64x1_t)__builtin_neon_vdup_nv1di (__a)
2870 #define vdupq_n_s8(__a) \
2871 (int8x16_t)__builtin_neon_vdup_nv16qi (__a)
2873 #define vdupq_n_s16(__a) \
2874 (int16x8_t)__builtin_neon_vdup_nv8hi (__a)
2876 #define vdupq_n_s32(__a) \
2877 (int32x4_t)__builtin_neon_vdup_nv4si (__a)
2879 #define vdupq_n_f32(__a) \
2880 (float32x4_t)__builtin_neon_vdup_nv4sf (__a)
2882 #define vdupq_n_u8(__a) \
2883 (uint8x16_t)__builtin_neon_vdup_nv16qi (__a)
2885 #define vdupq_n_u16(__a) \
2886 (uint16x8_t)__builtin_neon_vdup_nv8hi (__a)
2888 #define vdupq_n_u32(__a) \
2889 (uint32x4_t)__builtin_neon_vdup_nv4si (__a)
2891 #define vdupq_n_p8(__a) \
2892 (poly8x16_t)__builtin_neon_vdup_nv16qi (__a)
2894 #define vdupq_n_p16(__a) \
2895 (poly16x8_t)__builtin_neon_vdup_nv8hi (__a)
2897 #define vdupq_n_s64(__a) \
2898 (int64x2_t)__builtin_neon_vdup_nv2di (__a)
2900 #define vdupq_n_u64(__a) \
2901 (uint64x2_t)__builtin_neon_vdup_nv2di (__a)
2903 #define vmov_n_s8(__a) \
2904 (int8x8_t)__builtin_neon_vdup_nv8qi (__a)
2906 #define vmov_n_s16(__a) \
2907 (int16x4_t)__builtin_neon_vdup_nv4hi (__a)
2909 #define vmov_n_s32(__a) \
2910 (int32x2_t)__builtin_neon_vdup_nv2si (__a)
2912 #define vmov_n_f32(__a) \
2913 (float32x2_t)__builtin_neon_vdup_nv2sf (__a)
2915 #define vmov_n_u8(__a) \
2916 (uint8x8_t)__builtin_neon_vdup_nv8qi (__a)
2918 #define vmov_n_u16(__a) \
2919 (uint16x4_t)__builtin_neon_vdup_nv4hi (__a)
2921 #define vmov_n_u32(__a) \
2922 (uint32x2_t)__builtin_neon_vdup_nv2si (__a)
2924 #define vmov_n_p8(__a) \
2925 (poly8x8_t)__builtin_neon_vdup_nv8qi (__a)
2927 #define vmov_n_p16(__a) \
2928 (poly16x4_t)__builtin_neon_vdup_nv4hi (__a)
2930 #define vmov_n_s64(__a) \
2931 (int64x1_t)__builtin_neon_vdup_nv1di (__a)
2933 #define vmov_n_u64(__a) \
2934 (uint64x1_t)__builtin_neon_vdup_nv1di (__a)
2936 #define vmovq_n_s8(__a) \
2937 (int8x16_t)__builtin_neon_vdup_nv16qi (__a)
2939 #define vmovq_n_s16(__a) \
2940 (int16x8_t)__builtin_neon_vdup_nv8hi (__a)
2942 #define vmovq_n_s32(__a) \
2943 (int32x4_t)__builtin_neon_vdup_nv4si (__a)
2945 #define vmovq_n_f32(__a) \
2946 (float32x4_t)__builtin_neon_vdup_nv4sf (__a)
2948 #define vmovq_n_u8(__a) \
2949 (uint8x16_t)__builtin_neon_vdup_nv16qi (__a)
2951 #define vmovq_n_u16(__a) \
2952 (uint16x8_t)__builtin_neon_vdup_nv8hi (__a)
2954 #define vmovq_n_u32(__a) \
2955 (uint32x4_t)__builtin_neon_vdup_nv4si (__a)
2957 #define vmovq_n_p8(__a) \
2958 (poly8x16_t)__builtin_neon_vdup_nv16qi (__a)
2960 #define vmovq_n_p16(__a) \
2961 (poly16x8_t)__builtin_neon_vdup_nv8hi (__a)
2963 #define vmovq_n_s64(__a) \
2964 (int64x2_t)__builtin_neon_vdup_nv2di (__a)
2966 #define vmovq_n_u64(__a) \
2967 (uint64x2_t)__builtin_neon_vdup_nv2di (__a)
2969 #define vdup_lane_s8(__a, __b) \
2970 (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b)
2972 #define vdup_lane_s16(__a, __b) \
2973 (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b)
2975 #define vdup_lane_s32(__a, __b) \
2976 (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b)
2978 #define vdup_lane_f32(__a, __b) \
2979 (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b)
2981 #define vdup_lane_u8(__a, __b) \
2982 (uint8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b)
2984 #define vdup_lane_u16(__a, __b) \
2985 (uint16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b)
2987 #define vdup_lane_u32(__a, __b) \
2988 (uint32x2_t)__builtin_neon_vdup_lanev2si (__a, __b)
2990 #define vdup_lane_p8(__a, __b) \
2991 (poly8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b)
2993 #define vdup_lane_p16(__a, __b) \
2994 (poly16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b)
2996 #define vdup_lane_s64(__a, __b) \
2997 (int64x1_t)__builtin_neon_vdup_lanev1di (__a, __b)
2999 #define vdup_lane_u64(__a, __b) \
3000 (uint64x1_t)__builtin_neon_vdup_lanev1di (__a, __b)
3002 #define vdupq_lane_s8(__a, __b) \
3003 (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b)
3005 #define vdupq_lane_s16(__a, __b) \
3006 (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b)
3008 #define vdupq_lane_s32(__a, __b) \
3009 (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b)
3011 #define vdupq_lane_f32(__a, __b) \
3012 (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b)
3014 #define vdupq_lane_u8(__a, __b) \
3015 (uint8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b)
3017 #define vdupq_lane_u16(__a, __b) \
3018 (uint16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b)
3020 #define vdupq_lane_u32(__a, __b) \
3021 (uint32x4_t)__builtin_neon_vdup_lanev4si (__a, __b)
3023 #define vdupq_lane_p8(__a, __b) \
3024 (poly8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b)
3026 #define vdupq_lane_p16(__a, __b) \
3027 (poly16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b)
3029 #define vdupq_lane_s64(__a, __b) \
3030 (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b)
3032 #define vdupq_lane_u64(__a, __b) \
3033 (uint64x2_t)__builtin_neon_vdup_lanev2di (__a, __b)
3035 #define vcombine_s8(__a, __b) \
3036 (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b)
3038 #define vcombine_s16(__a, __b) \
3039 (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b)
3041 #define vcombine_s32(__a, __b) \
3042 (int32x4_t)__builtin_neon_vcombinev2si (__a, __b)
3044 #define vcombine_s64(__a, __b) \
3045 (int64x2_t)__builtin_neon_vcombinev1di (__a, __b)
3047 #define vcombine_f32(__a, __b) \
3048 (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b)
3050 #define vcombine_u8(__a, __b) \
3051 (uint8x16_t)__builtin_neon_vcombinev8qi (__a, __b)
3053 #define vcombine_u16(__a, __b) \
3054 (uint16x8_t)__builtin_neon_vcombinev4hi (__a, __b)
3056 #define vcombine_u32(__a, __b) \
3057 (uint32x4_t)__builtin_neon_vcombinev2si (__a, __b)
3059 #define vcombine_u64(__a, __b) \
3060 (uint64x2_t)__builtin_neon_vcombinev1di (__a, __b)
3062 #define vcombine_p8(__a, __b) \
3063 (poly8x16_t)__builtin_neon_vcombinev8qi (__a, __b)
3065 #define vcombine_p16(__a, __b) \
3066 (poly16x8_t)__builtin_neon_vcombinev4hi (__a, __b)
3068 #define vget_high_s8(__a) \
3069 (int8x8_t)__builtin_neon_vget_highv16qi (__a)
3071 #define vget_high_s16(__a) \
3072 (int16x4_t)__builtin_neon_vget_highv8hi (__a)
3074 #define vget_high_s32(__a) \
3075 (int32x2_t)__builtin_neon_vget_highv4si (__a)
3077 #define vget_high_s64(__a) \
3078 (int64x1_t)__builtin_neon_vget_highv2di (__a)
3080 #define vget_high_f32(__a) \
3081 (float32x2_t)__builtin_neon_vget_highv4sf (__a)
3083 #define vget_high_u8(__a) \
3084 (uint8x8_t)__builtin_neon_vget_highv16qi (__a)
3086 #define vget_high_u16(__a) \
3087 (uint16x4_t)__builtin_neon_vget_highv8hi (__a)
3089 #define vget_high_u32(__a) \
3090 (uint32x2_t)__builtin_neon_vget_highv4si (__a)
3092 #define vget_high_u64(__a) \
3093 (uint64x1_t)__builtin_neon_vget_highv2di (__a)
3095 #define vget_high_p8(__a) \
3096 (poly8x8_t)__builtin_neon_vget_highv16qi (__a)
3098 #define vget_high_p16(__a) \
3099 (poly16x4_t)__builtin_neon_vget_highv8hi (__a)
3101 #define vget_low_s8(__a) \
3102 (int8x8_t)__builtin_neon_vget_lowv16qi (__a)
3104 #define vget_low_s16(__a) \
3105 (int16x4_t)__builtin_neon_vget_lowv8hi (__a)
3107 #define vget_low_s32(__a) \
3108 (int32x2_t)__builtin_neon_vget_lowv4si (__a)
3110 #define vget_low_s64(__a) \
3111 (int64x1_t)__builtin_neon_vget_lowv2di (__a)
3113 #define vget_low_f32(__a) \
3114 (float32x2_t)__builtin_neon_vget_lowv4sf (__a)
3116 #define vget_low_u8(__a) \
3117 (uint8x8_t)__builtin_neon_vget_lowv16qi (__a)
3119 #define vget_low_u16(__a) \
3120 (uint16x4_t)__builtin_neon_vget_lowv8hi (__a)
3122 #define vget_low_u32(__a) \
3123 (uint32x2_t)__builtin_neon_vget_lowv4si (__a)
3125 #define vget_low_u64(__a) \
3126 (uint64x1_t)__builtin_neon_vget_lowv2di (__a)
3128 #define vget_low_p8(__a) \
3129 (poly8x8_t)__builtin_neon_vget_lowv16qi (__a)
3131 #define vget_low_p16(__a) \
3132 (poly16x4_t)__builtin_neon_vget_lowv8hi (__a)
3134 #define vcvt_s32_f32(__a) \
3135 (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1)
3137 #define vcvt_f32_s32(__a) \
3138 (float32x2_t)__builtin_neon_vcvtv2si (__a, 1)
3140 #define vcvt_f32_u32(__a) \
3141 (float32x2_t)__builtin_neon_vcvtv2si (__a, 0)
3143 #define vcvt_u32_f32(__a) \
3144 (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0)
3146 #define vcvtq_s32_f32(__a) \
3147 (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1)
3149 #define vcvtq_f32_s32(__a) \
3150 (float32x4_t)__builtin_neon_vcvtv4si (__a, 1)
3152 #define vcvtq_f32_u32(__a) \
3153 (float32x4_t)__builtin_neon_vcvtv4si (__a, 0)
3155 #define vcvtq_u32_f32(__a) \
3156 (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0)
3158 #define vcvt_n_s32_f32(__a, __b) \
3159 (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1)
3161 #define vcvt_n_f32_s32(__a, __b) \
3162 (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1)
3164 #define vcvt_n_f32_u32(__a, __b) \
3165 (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 0)
3167 #define vcvt_n_u32_f32(__a, __b) \
3168 (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0)
3170 #define vcvtq_n_s32_f32(__a, __b) \
3171 (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1)
3173 #define vcvtq_n_f32_s32(__a, __b) \
3174 (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1)
3176 #define vcvtq_n_f32_u32(__a, __b) \
3177 (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 0)
3179 #define vcvtq_n_u32_f32(__a, __b) \
3180 (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0)
3182 #define vmovn_s16(__a) \
3183 (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1)
3185 #define vmovn_s32(__a) \
3186 (int16x4_t)__builtin_neon_vmovnv4si (__a, 1)
3188 #define vmovn_s64(__a) \
3189 (int32x2_t)__builtin_neon_vmovnv2di (__a, 1)
3191 #define vmovn_u16(__a) \
3192 (uint8x8_t)__builtin_neon_vmovnv8hi (__a, 0)
3194 #define vmovn_u32(__a) \
3195 (uint16x4_t)__builtin_neon_vmovnv4si (__a, 0)
3197 #define vmovn_u64(__a) \
3198 (uint32x2_t)__builtin_neon_vmovnv2di (__a, 0)
3200 #define vqmovn_s16(__a) \
3201 (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1)
3203 #define vqmovn_s32(__a) \
3204 (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1)
3206 #define vqmovn_s64(__a) \
3207 (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1)
3209 #define vqmovn_u16(__a) \
3210 (uint8x8_t)__builtin_neon_vqmovnv8hi (__a, 0)
3212 #define vqmovn_u32(__a) \
3213 (uint16x4_t)__builtin_neon_vqmovnv4si (__a, 0)
3215 #define vqmovn_u64(__a) \
3216 (uint32x2_t)__builtin_neon_vqmovnv2di (__a, 0)
3218 #define vqmovun_s16(__a) \
3219 (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1)
3221 #define vqmovun_s32(__a) \
3222 (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1)
3224 #define vqmovun_s64(__a) \
3225 (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1)
3227 #define vmovl_s8(__a) \
3228 (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1)
3230 #define vmovl_s16(__a) \
3231 (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1)
3233 #define vmovl_s32(__a) \
3234 (int64x2_t)__builtin_neon_vmovlv2si (__a, 1)
3236 #define vmovl_u8(__a) \
3237 (uint16x8_t)__builtin_neon_vmovlv8qi (__a, 0)
3239 #define vmovl_u16(__a) \
3240 (uint32x4_t)__builtin_neon_vmovlv4hi (__a, 0)
3242 #define vmovl_u32(__a) \
3243 (uint64x2_t)__builtin_neon_vmovlv2si (__a, 0)
3245 #define vtbl1_s8(__a, __b) \
3246 (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b)
3248 #define vtbl1_u8(__a, __b) \
3249 (uint8x8_t)__builtin_neon_vtbl1v8qi (__a, __b)
3251 #define vtbl1_p8(__a, __b) \
3252 (poly8x8_t)__builtin_neon_vtbl1v8qi (__a, __b)
3254 #define vtbl2_s8(__a, __b) \
3255 ({ \
3256 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __au = { __a }; \
3257 (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \
3260 #define vtbl2_u8(__a, __b) \
3261 ({ \
3262 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __au = { __a }; \
3263 (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \
3266 #define vtbl2_p8(__a, __b) \
3267 ({ \
3268 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __au = { __a }; \
3269 (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \
3272 #define vtbl3_s8(__a, __b) \
3273 ({ \
3274 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __au = { __a }; \
3275 (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \
3278 #define vtbl3_u8(__a, __b) \
3279 ({ \
3280 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __au = { __a }; \
3281 (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \
3284 #define vtbl3_p8(__a, __b) \
3285 ({ \
3286 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __au = { __a }; \
3287 (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \
3290 #define vtbl4_s8(__a, __b) \
3291 ({ \
3292 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __au = { __a }; \
3293 (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \
3296 #define vtbl4_u8(__a, __b) \
3297 ({ \
3298 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __au = { __a }; \
3299 (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \
3302 #define vtbl4_p8(__a, __b) \
3303 ({ \
3304 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __au = { __a }; \
3305 (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \
3308 #define vtbx1_s8(__a, __b, __c) \
3309 (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c)
3311 #define vtbx1_u8(__a, __b, __c) \
3312 (uint8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c)
3314 #define vtbx1_p8(__a, __b, __c) \
3315 (poly8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c)
3317 #define vtbx2_s8(__a, __b, __c) \
3318 ({ \
3319 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
3320 (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \
3323 #define vtbx2_u8(__a, __b, __c) \
3324 ({ \
3325 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
3326 (uint8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \
3329 #define vtbx2_p8(__a, __b, __c) \
3330 ({ \
3331 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
3332 (poly8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \
3335 #define vtbx3_s8(__a, __b, __c) \
3336 ({ \
3337 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
3338 (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \
3341 #define vtbx3_u8(__a, __b, __c) \
3342 ({ \
3343 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
3344 (uint8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \
3347 #define vtbx3_p8(__a, __b, __c) \
3348 ({ \
3349 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
3350 (poly8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \
3353 #define vtbx4_s8(__a, __b, __c) \
3354 ({ \
3355 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
3356 (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \
3359 #define vtbx4_u8(__a, __b, __c) \
3360 ({ \
3361 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
3362 (uint8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \
3365 #define vtbx4_p8(__a, __b, __c) \
3366 ({ \
3367 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
3368 (poly8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \
3371 #define vmul_lane_s16(__a, __b, __c) \
3372 (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1)
3374 #define vmul_lane_s32(__a, __b, __c) \
3375 (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1)
3377 #define vmul_lane_f32(__a, __b, __c) \
3378 (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 5)
3380 #define vmul_lane_u16(__a, __b, __c) \
3381 (uint16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 0)
3383 #define vmul_lane_u32(__a, __b, __c) \
3384 (uint32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 0)
3386 #define vmulq_lane_s16(__a, __b, __c) \
3387 (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1)
3389 #define vmulq_lane_s32(__a, __b, __c) \
3390 (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1)
3392 #define vmulq_lane_f32(__a, __b, __c) \
3393 (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 5)
3395 #define vmulq_lane_u16(__a, __b, __c) \
3396 (uint16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 0)
3398 #define vmulq_lane_u32(__a, __b, __c) \
3399 (uint32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 0)
3401 #define vmla_lane_s16(__a, __b, __c, __d) \
3402 (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1)
3404 #define vmla_lane_s32(__a, __b, __c, __d) \
3405 (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1)
3407 #define vmla_lane_f32(__a, __b, __c, __d) \
3408 (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 5)
3410 #define vmla_lane_u16(__a, __b, __c, __d) \
3411 (uint16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 0)
3413 #define vmla_lane_u32(__a, __b, __c, __d) \
3414 (uint32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 0)
3416 #define vmlaq_lane_s16(__a, __b, __c, __d) \
3417 (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1)
3419 #define vmlaq_lane_s32(__a, __b, __c, __d) \
3420 (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1)
3422 #define vmlaq_lane_f32(__a, __b, __c, __d) \
3423 (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 5)
3425 #define vmlaq_lane_u16(__a, __b, __c, __d) \
3426 (uint16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 0)
3428 #define vmlaq_lane_u32(__a, __b, __c, __d) \
3429 (uint32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 0)
3431 #define vmlal_lane_s16(__a, __b, __c, __d) \
3432 (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1)
3434 #define vmlal_lane_s32(__a, __b, __c, __d) \
3435 (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1)
3437 #define vmlal_lane_u16(__a, __b, __c, __d) \
3438 (uint32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 0)
3440 #define vmlal_lane_u32(__a, __b, __c, __d) \
3441 (uint64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 0)
3443 #define vqdmlal_lane_s16(__a, __b, __c, __d) \
3444 (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1)
3446 #define vqdmlal_lane_s32(__a, __b, __c, __d) \
3447 (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1)
3449 #define vmls_lane_s16(__a, __b, __c, __d) \
3450 (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1)
3452 #define vmls_lane_s32(__a, __b, __c, __d) \
3453 (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1)
3455 #define vmls_lane_f32(__a, __b, __c, __d) \
3456 (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 5)
3458 #define vmls_lane_u16(__a, __b, __c, __d) \
3459 (uint16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 0)
3461 #define vmls_lane_u32(__a, __b, __c, __d) \
3462 (uint32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 0)
3464 #define vmlsq_lane_s16(__a, __b, __c, __d) \
3465 (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1)
3467 #define vmlsq_lane_s32(__a, __b, __c, __d) \
3468 (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1)
3470 #define vmlsq_lane_f32(__a, __b, __c, __d) \
3471 (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 5)
3473 #define vmlsq_lane_u16(__a, __b, __c, __d) \
3474 (uint16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 0)
3476 #define vmlsq_lane_u32(__a, __b, __c, __d) \
3477 (uint32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 0)
3479 #define vmlsl_lane_s16(__a, __b, __c, __d) \
3480 (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1)
3482 #define vmlsl_lane_s32(__a, __b, __c, __d) \
3483 (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1)
3485 #define vmlsl_lane_u16(__a, __b, __c, __d) \
3486 (uint32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 0)
3488 #define vmlsl_lane_u32(__a, __b, __c, __d) \
3489 (uint64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 0)
3491 #define vqdmlsl_lane_s16(__a, __b, __c, __d) \
3492 (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1)
3494 #define vqdmlsl_lane_s32(__a, __b, __c, __d) \
3495 (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1)
3497 #define vmull_lane_s16(__a, __b, __c) \
3498 (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1)
3500 #define vmull_lane_s32(__a, __b, __c) \
3501 (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1)
3503 #define vmull_lane_u16(__a, __b, __c) \
3504 (uint32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 0)
3506 #define vmull_lane_u32(__a, __b, __c) \
3507 (uint64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 0)
3509 #define vqdmull_lane_s16(__a, __b, __c) \
3510 (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1)
3512 #define vqdmull_lane_s32(__a, __b, __c) \
3513 (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1)
3515 #define vqdmulhq_lane_s16(__a, __b, __c) \
3516 (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1)
3518 #define vqdmulhq_lane_s32(__a, __b, __c) \
3519 (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1)
3521 #define vqdmulh_lane_s16(__a, __b, __c) \
3522 (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1)
3524 #define vqdmulh_lane_s32(__a, __b, __c) \
3525 (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1)
3527 #define vqrdmulhq_lane_s16(__a, __b, __c) \
3528 (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 3)
3530 #define vqrdmulhq_lane_s32(__a, __b, __c) \
3531 (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 3)
3533 #define vqrdmulh_lane_s16(__a, __b, __c) \
3534 (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 3)
3536 #define vqrdmulh_lane_s32(__a, __b, __c) \
3537 (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 3)
3539 #define vmul_n_s16(__a, __b) \
3540 (int16x4_t)__builtin_neon_vmul_nv4hi (__a, __b, 1)
3542 #define vmul_n_s32(__a, __b) \
3543 (int32x2_t)__builtin_neon_vmul_nv2si (__a, __b, 1)
3545 #define vmul_n_f32(__a, __b) \
3546 (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 5)
3548 #define vmul_n_u16(__a, __b) \
3549 (uint16x4_t)__builtin_neon_vmul_nv4hi (__a, __b, 0)
3551 #define vmul_n_u32(__a, __b) \
3552 (uint32x2_t)__builtin_neon_vmul_nv2si (__a, __b, 0)
3554 #define vmulq_n_s16(__a, __b) \
3555 (int16x8_t)__builtin_neon_vmul_nv8hi (__a, __b, 1)
3557 #define vmulq_n_s32(__a, __b) \
3558 (int32x4_t)__builtin_neon_vmul_nv4si (__a, __b, 1)
3560 #define vmulq_n_f32(__a, __b) \
3561 (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 5)
3563 #define vmulq_n_u16(__a, __b) \
3564 (uint16x8_t)__builtin_neon_vmul_nv8hi (__a, __b, 0)
3566 #define vmulq_n_u32(__a, __b) \
3567 (uint32x4_t)__builtin_neon_vmul_nv4si (__a, __b, 0)
3569 #define vmull_n_s16(__a, __b) \
3570 (int32x4_t)__builtin_neon_vmull_nv4hi (__a, __b, 1)
3572 #define vmull_n_s32(__a, __b) \
3573 (int64x2_t)__builtin_neon_vmull_nv2si (__a, __b, 1)
3575 #define vmull_n_u16(__a, __b) \
3576 (uint32x4_t)__builtin_neon_vmull_nv4hi (__a, __b, 0)
3578 #define vmull_n_u32(__a, __b) \
3579 (uint64x2_t)__builtin_neon_vmull_nv2si (__a, __b, 0)
3581 #define vqdmull_n_s16(__a, __b) \
3582 (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, __b, 1)
3584 #define vqdmull_n_s32(__a, __b) \
3585 (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, __b, 1)
3587 #define vqdmulhq_n_s16(__a, __b) \
3588 (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, __b, 1)
3590 #define vqdmulhq_n_s32(__a, __b) \
3591 (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, __b, 1)
3593 #define vqdmulh_n_s16(__a, __b) \
3594 (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, __b, 1)
3596 #define vqdmulh_n_s32(__a, __b) \
3597 (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, __b, 1)
3599 #define vqrdmulhq_n_s16(__a, __b) \
3600 (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, __b, 3)
3602 #define vqrdmulhq_n_s32(__a, __b) \
3603 (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, __b, 3)
3605 #define vqrdmulh_n_s16(__a, __b) \
3606 (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, __b, 3)
3608 #define vqrdmulh_n_s32(__a, __b) \
3609 (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, __b, 3)
3611 #define vmla_n_s16(__a, __b, __c) \
3612 (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, __c, 1)
3614 #define vmla_n_s32(__a, __b, __c) \
3615 (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, __c, 1)
3617 #define vmla_n_f32(__a, __b, __c) \
3618 (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 5)
3620 #define vmla_n_u16(__a, __b, __c) \
3621 (uint16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, __c, 0)
3623 #define vmla_n_u32(__a, __b, __c) \
3624 (uint32x2_t)__builtin_neon_vmla_nv2si (__a, __b, __c, 0)
3626 #define vmlaq_n_s16(__a, __b, __c) \
3627 (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, __c, 1)
3629 #define vmlaq_n_s32(__a, __b, __c) \
3630 (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, __c, 1)
3632 #define vmlaq_n_f32(__a, __b, __c) \
3633 (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 5)
3635 #define vmlaq_n_u16(__a, __b, __c) \
3636 (uint16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, __c, 0)
3638 #define vmlaq_n_u32(__a, __b, __c) \
3639 (uint32x4_t)__builtin_neon_vmla_nv4si (__a, __b, __c, 0)
3641 #define vmlal_n_s16(__a, __b, __c) \
3642 (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, __c, 1)
3644 #define vmlal_n_s32(__a, __b, __c) \
3645 (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, __c, 1)
3647 #define vmlal_n_u16(__a, __b, __c) \
3648 (uint32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, __c, 0)
3650 #define vmlal_n_u32(__a, __b, __c) \
3651 (uint64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, __c, 0)
3653 #define vqdmlal_n_s16(__a, __b, __c) \
3654 (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, __c, 1)
3656 #define vqdmlal_n_s32(__a, __b, __c) \
3657 (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, __c, 1)
3659 #define vmls_n_s16(__a, __b, __c) \
3660 (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, __c, 1)
3662 #define vmls_n_s32(__a, __b, __c) \
3663 (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, __c, 1)
3665 #define vmls_n_f32(__a, __b, __c) \
3666 (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 5)
3668 #define vmls_n_u16(__a, __b, __c) \
3669 (uint16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, __c, 0)
3671 #define vmls_n_u32(__a, __b, __c) \
3672 (uint32x2_t)__builtin_neon_vmls_nv2si (__a, __b, __c, 0)
3674 #define vmlsq_n_s16(__a, __b, __c) \
3675 (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, __c, 1)
3677 #define vmlsq_n_s32(__a, __b, __c) \
3678 (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, __c, 1)
3680 #define vmlsq_n_f32(__a, __b, __c) \
3681 (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 5)
3683 #define vmlsq_n_u16(__a, __b, __c) \
3684 (uint16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, __c, 0)
3686 #define vmlsq_n_u32(__a, __b, __c) \
3687 (uint32x4_t)__builtin_neon_vmls_nv4si (__a, __b, __c, 0)
3689 #define vmlsl_n_s16(__a, __b, __c) \
3690 (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, __c, 1)
3692 #define vmlsl_n_s32(__a, __b, __c) \
3693 (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, __c, 1)
3695 #define vmlsl_n_u16(__a, __b, __c) \
3696 (uint32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, __c, 0)
3698 #define vmlsl_n_u32(__a, __b, __c) \
3699 (uint64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, __c, 0)
3701 #define vqdmlsl_n_s16(__a, __b, __c) \
3702 (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, __c, 1)
3704 #define vqdmlsl_n_s32(__a, __b, __c) \
3705 (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, __c, 1)
3707 #define vext_s8(__a, __b, __c) \
3708 (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c)
3710 #define vext_s16(__a, __b, __c) \
3711 (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c)
3713 #define vext_s32(__a, __b, __c) \
3714 (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c)
3716 #define vext_s64(__a, __b, __c) \
3717 (int64x1_t)__builtin_neon_vextv1di (__a, __b, __c)
3719 #define vext_f32(__a, __b, __c) \
3720 (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c)
3722 #define vext_u8(__a, __b, __c) \
3723 (uint8x8_t)__builtin_neon_vextv8qi (__a, __b, __c)
3725 #define vext_u16(__a, __b, __c) \
3726 (uint16x4_t)__builtin_neon_vextv4hi (__a, __b, __c)
3728 #define vext_u32(__a, __b, __c) \
3729 (uint32x2_t)__builtin_neon_vextv2si (__a, __b, __c)
3731 #define vext_u64(__a, __b, __c) \
3732 (uint64x1_t)__builtin_neon_vextv1di (__a, __b, __c)
3734 #define vext_p8(__a, __b, __c) \
3735 (poly8x8_t)__builtin_neon_vextv8qi (__a, __b, __c)
3737 #define vext_p16(__a, __b, __c) \
3738 (poly16x4_t)__builtin_neon_vextv4hi (__a, __b, __c)
3740 #define vextq_s8(__a, __b, __c) \
3741 (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c)
3743 #define vextq_s16(__a, __b, __c) \
3744 (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c)
3746 #define vextq_s32(__a, __b, __c) \
3747 (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c)
3749 #define vextq_s64(__a, __b, __c) \
3750 (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c)
3752 #define vextq_f32(__a, __b, __c) \
3753 (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c)
3755 #define vextq_u8(__a, __b, __c) \
3756 (uint8x16_t)__builtin_neon_vextv16qi (__a, __b, __c)
3758 #define vextq_u16(__a, __b, __c) \
3759 (uint16x8_t)__builtin_neon_vextv8hi (__a, __b, __c)
3761 #define vextq_u32(__a, __b, __c) \
3762 (uint32x4_t)__builtin_neon_vextv4si (__a, __b, __c)
3764 #define vextq_u64(__a, __b, __c) \
3765 (uint64x2_t)__builtin_neon_vextv2di (__a, __b, __c)
3767 #define vextq_p8(__a, __b, __c) \
3768 (poly8x16_t)__builtin_neon_vextv16qi (__a, __b, __c)
3770 #define vextq_p16(__a, __b, __c) \
3771 (poly16x8_t)__builtin_neon_vextv8hi (__a, __b, __c)
3773 #define vrev64_s8(__a) \
3774 (int8x8_t)__builtin_neon_vrev64v8qi (__a, 1)
3776 #define vrev64_s16(__a) \
3777 (int16x4_t)__builtin_neon_vrev64v4hi (__a, 1)
3779 #define vrev64_s32(__a) \
3780 (int32x2_t)__builtin_neon_vrev64v2si (__a, 1)
3782 #define vrev64_f32(__a) \
3783 (float32x2_t)__builtin_neon_vrev64v2sf (__a, 5)
3785 #define vrev64_u8(__a) \
3786 (uint8x8_t)__builtin_neon_vrev64v8qi (__a, 0)
3788 #define vrev64_u16(__a) \
3789 (uint16x4_t)__builtin_neon_vrev64v4hi (__a, 0)
3791 #define vrev64_u32(__a) \
3792 (uint32x2_t)__builtin_neon_vrev64v2si (__a, 0)
3794 #define vrev64_p8(__a) \
3795 (poly8x8_t)__builtin_neon_vrev64v8qi (__a, 4)
3797 #define vrev64_p16(__a) \
3798 (poly16x4_t)__builtin_neon_vrev64v4hi (__a, 4)
3800 #define vrev64q_s8(__a) \
3801 (int8x16_t)__builtin_neon_vrev64v16qi (__a, 1)
3803 #define vrev64q_s16(__a) \
3804 (int16x8_t)__builtin_neon_vrev64v8hi (__a, 1)
3806 #define vrev64q_s32(__a) \
3807 (int32x4_t)__builtin_neon_vrev64v4si (__a, 1)
3809 #define vrev64q_f32(__a) \
3810 (float32x4_t)__builtin_neon_vrev64v4sf (__a, 5)
3812 #define vrev64q_u8(__a) \
3813 (uint8x16_t)__builtin_neon_vrev64v16qi (__a, 0)
3815 #define vrev64q_u16(__a) \
3816 (uint16x8_t)__builtin_neon_vrev64v8hi (__a, 0)
3818 #define vrev64q_u32(__a) \
3819 (uint32x4_t)__builtin_neon_vrev64v4si (__a, 0)
3821 #define vrev64q_p8(__a) \
3822 (poly8x16_t)__builtin_neon_vrev64v16qi (__a, 4)
3824 #define vrev64q_p16(__a) \
3825 (poly16x8_t)__builtin_neon_vrev64v8hi (__a, 4)
3827 #define vrev32_s8(__a) \
3828 (int8x8_t)__builtin_neon_vrev32v8qi (__a, 1)
3830 #define vrev32_s16(__a) \
3831 (int16x4_t)__builtin_neon_vrev32v4hi (__a, 1)
3833 #define vrev32_u8(__a) \
3834 (uint8x8_t)__builtin_neon_vrev32v8qi (__a, 0)
3836 #define vrev32_u16(__a) \
3837 (uint16x4_t)__builtin_neon_vrev32v4hi (__a, 0)
3839 #define vrev32_p8(__a) \
3840 (poly8x8_t)__builtin_neon_vrev32v8qi (__a, 4)
3842 #define vrev32_p16(__a) \
3843 (poly16x4_t)__builtin_neon_vrev32v4hi (__a, 4)
3845 #define vrev32q_s8(__a) \
3846 (int8x16_t)__builtin_neon_vrev32v16qi (__a, 1)
3848 #define vrev32q_s16(__a) \
3849 (int16x8_t)__builtin_neon_vrev32v8hi (__a, 1)
3851 #define vrev32q_u8(__a) \
3852 (uint8x16_t)__builtin_neon_vrev32v16qi (__a, 0)
3854 #define vrev32q_u16(__a) \
3855 (uint16x8_t)__builtin_neon_vrev32v8hi (__a, 0)
3857 #define vrev32q_p8(__a) \
3858 (poly8x16_t)__builtin_neon_vrev32v16qi (__a, 4)
3860 #define vrev32q_p16(__a) \
3861 (poly16x8_t)__builtin_neon_vrev32v8hi (__a, 4)
3863 #define vrev16_s8(__a) \
3864 (int8x8_t)__builtin_neon_vrev16v8qi (__a, 1)
3866 #define vrev16_u8(__a) \
3867 (uint8x8_t)__builtin_neon_vrev16v8qi (__a, 0)
3869 #define vrev16_p8(__a) \
3870 (poly8x8_t)__builtin_neon_vrev16v8qi (__a, 4)
3872 #define vrev16q_s8(__a) \
3873 (int8x16_t)__builtin_neon_vrev16v16qi (__a, 1)
3875 #define vrev16q_u8(__a) \
3876 (uint8x16_t)__builtin_neon_vrev16v16qi (__a, 0)
3878 #define vrev16q_p8(__a) \
3879 (poly8x16_t)__builtin_neon_vrev16v16qi (__a, 4)
3881 #define vbsl_s8(__a, __b, __c) \
3882 (int8x8_t)__builtin_neon_vbslv8qi (__a, __b, __c)
3884 #define vbsl_s16(__a, __b, __c) \
3885 (int16x4_t)__builtin_neon_vbslv4hi (__a, __b, __c)
3887 #define vbsl_s32(__a, __b, __c) \
3888 (int32x2_t)__builtin_neon_vbslv2si (__a, __b, __c)
3890 #define vbsl_s64(__a, __b, __c) \
3891 (int64x1_t)__builtin_neon_vbslv1di (__a, __b, __c)
3893 #define vbsl_f32(__a, __b, __c) \
3894 (float32x2_t)__builtin_neon_vbslv2sf (__a, __b, __c)
3896 #define vbsl_u8(__a, __b, __c) \
3897 (uint8x8_t)__builtin_neon_vbslv8qi (__a, __b, __c)
3899 #define vbsl_u16(__a, __b, __c) \
3900 (uint16x4_t)__builtin_neon_vbslv4hi (__a, __b, __c)
3902 #define vbsl_u32(__a, __b, __c) \
3903 (uint32x2_t)__builtin_neon_vbslv2si (__a, __b, __c)
3905 #define vbsl_u64(__a, __b, __c) \
3906 (uint64x1_t)__builtin_neon_vbslv1di (__a, __b, __c)
3908 #define vbsl_p8(__a, __b, __c) \
3909 (poly8x8_t)__builtin_neon_vbslv8qi (__a, __b, __c)
3911 #define vbsl_p16(__a, __b, __c) \
3912 (poly16x4_t)__builtin_neon_vbslv4hi (__a, __b, __c)
3914 #define vbslq_s8(__a, __b, __c) \
3915 (int8x16_t)__builtin_neon_vbslv16qi (__a, __b, __c)
3917 #define vbslq_s16(__a, __b, __c) \
3918 (int16x8_t)__builtin_neon_vbslv8hi (__a, __b, __c)
3920 #define vbslq_s32(__a, __b, __c) \
3921 (int32x4_t)__builtin_neon_vbslv4si (__a, __b, __c)
3923 #define vbslq_s64(__a, __b, __c) \
3924 (int64x2_t)__builtin_neon_vbslv2di (__a, __b, __c)
3926 #define vbslq_f32(__a, __b, __c) \
3927 (float32x4_t)__builtin_neon_vbslv4sf (__a, __b, __c)
3929 #define vbslq_u8(__a, __b, __c) \
3930 (uint8x16_t)__builtin_neon_vbslv16qi (__a, __b, __c)
3932 #define vbslq_u16(__a, __b, __c) \
3933 (uint16x8_t)__builtin_neon_vbslv8hi (__a, __b, __c)
3935 #define vbslq_u32(__a, __b, __c) \
3936 (uint32x4_t)__builtin_neon_vbslv4si (__a, __b, __c)
3938 #define vbslq_u64(__a, __b, __c) \
3939 (uint64x2_t)__builtin_neon_vbslv2di (__a, __b, __c)
3941 #define vbslq_p8(__a, __b, __c) \
3942 (poly8x16_t)__builtin_neon_vbslv16qi (__a, __b, __c)
3944 #define vbslq_p16(__a, __b, __c) \
3945 (poly16x8_t)__builtin_neon_vbslv8hi (__a, __b, __c)
3947 #define vtrn_s8(__a, __b) \
3948 ({ \
3949 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
3950 __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \
3951 __rv.__i; \
3954 #define vtrn_s16(__a, __b) \
3955 ({ \
3956 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
3957 __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \
3958 __rv.__i; \
3961 #define vtrn_s32(__a, __b) \
3962 ({ \
3963 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
3964 __rv.__o = __builtin_neon_vtrnv2si (__a, __b); \
3965 __rv.__i; \
3968 #define vtrn_f32(__a, __b) \
3969 ({ \
3970 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
3971 __rv.__o = __builtin_neon_vtrnv2sf (__a, __b); \
3972 __rv.__i; \
3975 #define vtrn_u8(__a, __b) \
3976 ({ \
3977 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
3978 __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \
3979 __rv.__i; \
3982 #define vtrn_u16(__a, __b) \
3983 ({ \
3984 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
3985 __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \
3986 __rv.__i; \
3989 #define vtrn_u32(__a, __b) \
3990 ({ \
3991 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
3992 __rv.__o = __builtin_neon_vtrnv2si (__a, __b); \
3993 __rv.__i; \
3996 #define vtrn_p8(__a, __b) \
3997 ({ \
3998 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
3999 __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \
4000 __rv.__i; \
4003 #define vtrn_p16(__a, __b) \
4004 ({ \
4005 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4006 __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \
4007 __rv.__i; \
4010 #define vtrnq_s8(__a, __b) \
4011 ({ \
4012 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4013 __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \
4014 __rv.__i; \
4017 #define vtrnq_s16(__a, __b) \
4018 ({ \
4019 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4020 __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \
4021 __rv.__i; \
4024 #define vtrnq_s32(__a, __b) \
4025 ({ \
4026 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4027 __rv.__o = __builtin_neon_vtrnv4si (__a, __b); \
4028 __rv.__i; \
4031 #define vtrnq_f32(__a, __b) \
4032 ({ \
4033 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4034 __rv.__o = __builtin_neon_vtrnv4sf (__a, __b); \
4035 __rv.__i; \
4038 #define vtrnq_u8(__a, __b) \
4039 ({ \
4040 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4041 __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \
4042 __rv.__i; \
4045 #define vtrnq_u16(__a, __b) \
4046 ({ \
4047 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4048 __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \
4049 __rv.__i; \
4052 #define vtrnq_u32(__a, __b) \
4053 ({ \
4054 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4055 __rv.__o = __builtin_neon_vtrnv4si (__a, __b); \
4056 __rv.__i; \
4059 #define vtrnq_p8(__a, __b) \
4060 ({ \
4061 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4062 __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \
4063 __rv.__i; \
4066 #define vtrnq_p16(__a, __b) \
4067 ({ \
4068 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4069 __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \
4070 __rv.__i; \
4073 #define vzip_s8(__a, __b) \
4074 ({ \
4075 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4076 __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \
4077 __rv.__i; \
4080 #define vzip_s16(__a, __b) \
4081 ({ \
4082 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4083 __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \
4084 __rv.__i; \
4087 #define vzip_s32(__a, __b) \
4088 ({ \
4089 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4090 __rv.__o = __builtin_neon_vzipv2si (__a, __b); \
4091 __rv.__i; \
4094 #define vzip_f32(__a, __b) \
4095 ({ \
4096 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4097 __rv.__o = __builtin_neon_vzipv2sf (__a, __b); \
4098 __rv.__i; \
4101 #define vzip_u8(__a, __b) \
4102 ({ \
4103 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4104 __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \
4105 __rv.__i; \
4108 #define vzip_u16(__a, __b) \
4109 ({ \
4110 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4111 __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \
4112 __rv.__i; \
4115 #define vzip_u32(__a, __b) \
4116 ({ \
4117 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4118 __rv.__o = __builtin_neon_vzipv2si (__a, __b); \
4119 __rv.__i; \
4122 #define vzip_p8(__a, __b) \
4123 ({ \
4124 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4125 __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \
4126 __rv.__i; \
4129 #define vzip_p16(__a, __b) \
4130 ({ \
4131 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4132 __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \
4133 __rv.__i; \
4136 #define vzipq_s8(__a, __b) \
4137 ({ \
4138 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4139 __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \
4140 __rv.__i; \
4143 #define vzipq_s16(__a, __b) \
4144 ({ \
4145 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4146 __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \
4147 __rv.__i; \
4150 #define vzipq_s32(__a, __b) \
4151 ({ \
4152 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4153 __rv.__o = __builtin_neon_vzipv4si (__a, __b); \
4154 __rv.__i; \
4157 #define vzipq_f32(__a, __b) \
4158 ({ \
4159 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4160 __rv.__o = __builtin_neon_vzipv4sf (__a, __b); \
4161 __rv.__i; \
4164 #define vzipq_u8(__a, __b) \
4165 ({ \
4166 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4167 __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \
4168 __rv.__i; \
4171 #define vzipq_u16(__a, __b) \
4172 ({ \
4173 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4174 __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \
4175 __rv.__i; \
4178 #define vzipq_u32(__a, __b) \
4179 ({ \
4180 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4181 __rv.__o = __builtin_neon_vzipv4si (__a, __b); \
4182 __rv.__i; \
4185 #define vzipq_p8(__a, __b) \
4186 ({ \
4187 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4188 __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \
4189 __rv.__i; \
4192 #define vzipq_p16(__a, __b) \
4193 ({ \
4194 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4195 __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \
4196 __rv.__i; \
4199 #define vuzp_s8(__a, __b) \
4200 ({ \
4201 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4202 __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \
4203 __rv.__i; \
4206 #define vuzp_s16(__a, __b) \
4207 ({ \
4208 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4209 __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \
4210 __rv.__i; \
4213 #define vuzp_s32(__a, __b) \
4214 ({ \
4215 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4216 __rv.__o = __builtin_neon_vuzpv2si (__a, __b); \
4217 __rv.__i; \
4220 #define vuzp_f32(__a, __b) \
4221 ({ \
4222 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4223 __rv.__o = __builtin_neon_vuzpv2sf (__a, __b); \
4224 __rv.__i; \
4227 #define vuzp_u8(__a, __b) \
4228 ({ \
4229 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4230 __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \
4231 __rv.__i; \
4234 #define vuzp_u16(__a, __b) \
4235 ({ \
4236 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4237 __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \
4238 __rv.__i; \
4241 #define vuzp_u32(__a, __b) \
4242 ({ \
4243 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4244 __rv.__o = __builtin_neon_vuzpv2si (__a, __b); \
4245 __rv.__i; \
4248 #define vuzp_p8(__a, __b) \
4249 ({ \
4250 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4251 __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \
4252 __rv.__i; \
4255 #define vuzp_p16(__a, __b) \
4256 ({ \
4257 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4258 __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \
4259 __rv.__i; \
4262 #define vuzpq_s8(__a, __b) \
4263 ({ \
4264 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4265 __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \
4266 __rv.__i; \
4269 #define vuzpq_s16(__a, __b) \
4270 ({ \
4271 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4272 __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \
4273 __rv.__i; \
4276 #define vuzpq_s32(__a, __b) \
4277 ({ \
4278 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4279 __rv.__o = __builtin_neon_vuzpv4si (__a, __b); \
4280 __rv.__i; \
4283 #define vuzpq_f32(__a, __b) \
4284 ({ \
4285 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4286 __rv.__o = __builtin_neon_vuzpv4sf (__a, __b); \
4287 __rv.__i; \
4290 #define vuzpq_u8(__a, __b) \
4291 ({ \
4292 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4293 __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \
4294 __rv.__i; \
4297 #define vuzpq_u16(__a, __b) \
4298 ({ \
4299 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4300 __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \
4301 __rv.__i; \
4304 #define vuzpq_u32(__a, __b) \
4305 ({ \
4306 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4307 __rv.__o = __builtin_neon_vuzpv4si (__a, __b); \
4308 __rv.__i; \
4311 #define vuzpq_p8(__a, __b) \
4312 ({ \
4313 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4314 __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \
4315 __rv.__i; \
4318 #define vuzpq_p16(__a, __b) \
4319 ({ \
4320 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4321 __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \
4322 __rv.__i; \
4325 #define vld1_s8(__a) \
4326 (int8x8_t)__builtin_neon_vld1v8qi (__a)
4328 #define vld1_s16(__a) \
4329 (int16x4_t)__builtin_neon_vld1v4hi (__a)
4331 #define vld1_s32(__a) \
4332 (int32x2_t)__builtin_neon_vld1v2si (__a)
4334 #define vld1_s64(__a) \
4335 (int64x1_t)__builtin_neon_vld1v1di (__a)
4337 #define vld1_f32(__a) \
4338 (float32x2_t)__builtin_neon_vld1v2sf (__a)
4340 #define vld1_u8(__a) \
4341 (uint8x8_t)__builtin_neon_vld1v8qi (__a)
4343 #define vld1_u16(__a) \
4344 (uint16x4_t)__builtin_neon_vld1v4hi (__a)
4346 #define vld1_u32(__a) \
4347 (uint32x2_t)__builtin_neon_vld1v2si (__a)
4349 #define vld1_u64(__a) \
4350 (uint64x1_t)__builtin_neon_vld1v1di (__a)
4352 #define vld1_p8(__a) \
4353 (poly8x8_t)__builtin_neon_vld1v8qi (__a)
4355 #define vld1_p16(__a) \
4356 (poly16x4_t)__builtin_neon_vld1v4hi (__a)
4358 #define vld1q_s8(__a) \
4359 (int8x16_t)__builtin_neon_vld1v16qi (__a)
4361 #define vld1q_s16(__a) \
4362 (int16x8_t)__builtin_neon_vld1v8hi (__a)
4364 #define vld1q_s32(__a) \
4365 (int32x4_t)__builtin_neon_vld1v4si (__a)
4367 #define vld1q_s64(__a) \
4368 (int64x2_t)__builtin_neon_vld1v2di (__a)
4370 #define vld1q_f32(__a) \
4371 (float32x4_t)__builtin_neon_vld1v4sf (__a)
4373 #define vld1q_u8(__a) \
4374 (uint8x16_t)__builtin_neon_vld1v16qi (__a)
4376 #define vld1q_u16(__a) \
4377 (uint16x8_t)__builtin_neon_vld1v8hi (__a)
4379 #define vld1q_u32(__a) \
4380 (uint32x4_t)__builtin_neon_vld1v4si (__a)
4382 #define vld1q_u64(__a) \
4383 (uint64x2_t)__builtin_neon_vld1v2di (__a)
4385 #define vld1q_p8(__a) \
4386 (poly8x16_t)__builtin_neon_vld1v16qi (__a)
4388 #define vld1q_p16(__a) \
4389 (poly16x8_t)__builtin_neon_vld1v8hi (__a)
4391 #define vld1_lane_s8(__a, __b, __c) \
4392 (int8x8_t)__builtin_neon_vld1_lanev8qi (__a, __b, __c)
4394 #define vld1_lane_s16(__a, __b, __c) \
4395 (int16x4_t)__builtin_neon_vld1_lanev4hi (__a, __b, __c)
4397 #define vld1_lane_s32(__a, __b, __c) \
4398 (int32x2_t)__builtin_neon_vld1_lanev2si (__a, __b, __c)
4400 #define vld1_lane_f32(__a, __b, __c) \
4401 (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c)
4403 #define vld1_lane_u8(__a, __b, __c) \
4404 (uint8x8_t)__builtin_neon_vld1_lanev8qi (__a, __b, __c)
4406 #define vld1_lane_u16(__a, __b, __c) \
4407 (uint16x4_t)__builtin_neon_vld1_lanev4hi (__a, __b, __c)
4409 #define vld1_lane_u32(__a, __b, __c) \
4410 (uint32x2_t)__builtin_neon_vld1_lanev2si (__a, __b, __c)
4412 #define vld1_lane_p8(__a, __b, __c) \
4413 (poly8x8_t)__builtin_neon_vld1_lanev8qi (__a, __b, __c)
4415 #define vld1_lane_p16(__a, __b, __c) \
4416 (poly16x4_t)__builtin_neon_vld1_lanev4hi (__a, __b, __c)
4418 #define vld1_lane_s64(__a, __b, __c) \
4419 (int64x1_t)__builtin_neon_vld1_lanev1di (__a, __b, __c)
4421 #define vld1_lane_u64(__a, __b, __c) \
4422 (uint64x1_t)__builtin_neon_vld1_lanev1di (__a, __b, __c)
4424 #define vld1q_lane_s8(__a, __b, __c) \
4425 (int8x16_t)__builtin_neon_vld1_lanev16qi (__a, __b, __c)
4427 #define vld1q_lane_s16(__a, __b, __c) \
4428 (int16x8_t)__builtin_neon_vld1_lanev8hi (__a, __b, __c)
4430 #define vld1q_lane_s32(__a, __b, __c) \
4431 (int32x4_t)__builtin_neon_vld1_lanev4si (__a, __b, __c)
4433 #define vld1q_lane_f32(__a, __b, __c) \
4434 (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c)
4436 #define vld1q_lane_u8(__a, __b, __c) \
4437 (uint8x16_t)__builtin_neon_vld1_lanev16qi (__a, __b, __c)
4439 #define vld1q_lane_u16(__a, __b, __c) \
4440 (uint16x8_t)__builtin_neon_vld1_lanev8hi (__a, __b, __c)
4442 #define vld1q_lane_u32(__a, __b, __c) \
4443 (uint32x4_t)__builtin_neon_vld1_lanev4si (__a, __b, __c)
4445 #define vld1q_lane_p8(__a, __b, __c) \
4446 (poly8x16_t)__builtin_neon_vld1_lanev16qi (__a, __b, __c)
4448 #define vld1q_lane_p16(__a, __b, __c) \
4449 (poly16x8_t)__builtin_neon_vld1_lanev8hi (__a, __b, __c)
4451 #define vld1q_lane_s64(__a, __b, __c) \
4452 (int64x2_t)__builtin_neon_vld1_lanev2di (__a, __b, __c)
4454 #define vld1q_lane_u64(__a, __b, __c) \
4455 (uint64x2_t)__builtin_neon_vld1_lanev2di (__a, __b, __c)
4457 #define vld1_dup_s8(__a) \
4458 (int8x8_t)__builtin_neon_vld1_dupv8qi (__a)
4460 #define vld1_dup_s16(__a) \
4461 (int16x4_t)__builtin_neon_vld1_dupv4hi (__a)
4463 #define vld1_dup_s32(__a) \
4464 (int32x2_t)__builtin_neon_vld1_dupv2si (__a)
4466 #define vld1_dup_f32(__a) \
4467 (float32x2_t)__builtin_neon_vld1_dupv2sf (__a)
4469 #define vld1_dup_u8(__a) \
4470 (uint8x8_t)__builtin_neon_vld1_dupv8qi (__a)
4472 #define vld1_dup_u16(__a) \
4473 (uint16x4_t)__builtin_neon_vld1_dupv4hi (__a)
4475 #define vld1_dup_u32(__a) \
4476 (uint32x2_t)__builtin_neon_vld1_dupv2si (__a)
4478 #define vld1_dup_p8(__a) \
4479 (poly8x8_t)__builtin_neon_vld1_dupv8qi (__a)
4481 #define vld1_dup_p16(__a) \
4482 (poly16x4_t)__builtin_neon_vld1_dupv4hi (__a)
4484 #define vld1_dup_s64(__a) \
4485 (int64x1_t)__builtin_neon_vld1_dupv1di (__a)
4487 #define vld1_dup_u64(__a) \
4488 (uint64x1_t)__builtin_neon_vld1_dupv1di (__a)
4490 #define vld1q_dup_s8(__a) \
4491 (int8x16_t)__builtin_neon_vld1_dupv16qi (__a)
4493 #define vld1q_dup_s16(__a) \
4494 (int16x8_t)__builtin_neon_vld1_dupv8hi (__a)
4496 #define vld1q_dup_s32(__a) \
4497 (int32x4_t)__builtin_neon_vld1_dupv4si (__a)
4499 #define vld1q_dup_f32(__a) \
4500 (float32x4_t)__builtin_neon_vld1_dupv4sf (__a)
4502 #define vld1q_dup_u8(__a) \
4503 (uint8x16_t)__builtin_neon_vld1_dupv16qi (__a)
4505 #define vld1q_dup_u16(__a) \
4506 (uint16x8_t)__builtin_neon_vld1_dupv8hi (__a)
4508 #define vld1q_dup_u32(__a) \
4509 (uint32x4_t)__builtin_neon_vld1_dupv4si (__a)
4511 #define vld1q_dup_p8(__a) \
4512 (poly8x16_t)__builtin_neon_vld1_dupv16qi (__a)
4514 #define vld1q_dup_p16(__a) \
4515 (poly16x8_t)__builtin_neon_vld1_dupv8hi (__a)
4517 #define vld1q_dup_s64(__a) \
4518 (int64x2_t)__builtin_neon_vld1_dupv2di (__a)
4520 #define vld1q_dup_u64(__a) \
4521 (uint64x2_t)__builtin_neon_vld1_dupv2di (__a)
4523 #define vst1_s8(__a, __b) \
4524 __builtin_neon_vst1v8qi (__a, __b)
4526 #define vst1_s16(__a, __b) \
4527 __builtin_neon_vst1v4hi (__a, __b)
4529 #define vst1_s32(__a, __b) \
4530 __builtin_neon_vst1v2si (__a, __b)
4532 #define vst1_s64(__a, __b) \
4533 __builtin_neon_vst1v1di (__a, __b)
4535 #define vst1_f32(__a, __b) \
4536 __builtin_neon_vst1v2sf (__a, __b)
4538 #define vst1_u8(__a, __b) \
4539 __builtin_neon_vst1v8qi (__a, __b)
4541 #define vst1_u16(__a, __b) \
4542 __builtin_neon_vst1v4hi (__a, __b)
4544 #define vst1_u32(__a, __b) \
4545 __builtin_neon_vst1v2si (__a, __b)
4547 #define vst1_u64(__a, __b) \
4548 __builtin_neon_vst1v1di (__a, __b)
4550 #define vst1_p8(__a, __b) \
4551 __builtin_neon_vst1v8qi (__a, __b)
4553 #define vst1_p16(__a, __b) \
4554 __builtin_neon_vst1v4hi (__a, __b)
4556 #define vst1q_s8(__a, __b) \
4557 __builtin_neon_vst1v16qi (__a, __b)
4559 #define vst1q_s16(__a, __b) \
4560 __builtin_neon_vst1v8hi (__a, __b)
4562 #define vst1q_s32(__a, __b) \
4563 __builtin_neon_vst1v4si (__a, __b)
4565 #define vst1q_s64(__a, __b) \
4566 __builtin_neon_vst1v2di (__a, __b)
4568 #define vst1q_f32(__a, __b) \
4569 __builtin_neon_vst1v4sf (__a, __b)
4571 #define vst1q_u8(__a, __b) \
4572 __builtin_neon_vst1v16qi (__a, __b)
4574 #define vst1q_u16(__a, __b) \
4575 __builtin_neon_vst1v8hi (__a, __b)
4577 #define vst1q_u32(__a, __b) \
4578 __builtin_neon_vst1v4si (__a, __b)
4580 #define vst1q_u64(__a, __b) \
4581 __builtin_neon_vst1v2di (__a, __b)
4583 #define vst1q_p8(__a, __b) \
4584 __builtin_neon_vst1v16qi (__a, __b)
4586 #define vst1q_p16(__a, __b) \
4587 __builtin_neon_vst1v8hi (__a, __b)
4589 #define vst1_lane_s8(__a, __b, __c) \
4590 __builtin_neon_vst1_lanev8qi (__a, __b, __c)
4592 #define vst1_lane_s16(__a, __b, __c) \
4593 __builtin_neon_vst1_lanev4hi (__a, __b, __c)
4595 #define vst1_lane_s32(__a, __b, __c) \
4596 __builtin_neon_vst1_lanev2si (__a, __b, __c)
4598 #define vst1_lane_f32(__a, __b, __c) \
4599 __builtin_neon_vst1_lanev2sf (__a, __b, __c)
4601 #define vst1_lane_u8(__a, __b, __c) \
4602 __builtin_neon_vst1_lanev8qi (__a, __b, __c)
4604 #define vst1_lane_u16(__a, __b, __c) \
4605 __builtin_neon_vst1_lanev4hi (__a, __b, __c)
4607 #define vst1_lane_u32(__a, __b, __c) \
4608 __builtin_neon_vst1_lanev2si (__a, __b, __c)
4610 #define vst1_lane_p8(__a, __b, __c) \
4611 __builtin_neon_vst1_lanev8qi (__a, __b, __c)
4613 #define vst1_lane_p16(__a, __b, __c) \
4614 __builtin_neon_vst1_lanev4hi (__a, __b, __c)
4616 #define vst1_lane_s64(__a, __b, __c) \
4617 __builtin_neon_vst1_lanev1di (__a, __b, __c)
4619 #define vst1_lane_u64(__a, __b, __c) \
4620 __builtin_neon_vst1_lanev1di (__a, __b, __c)
4622 #define vst1q_lane_s8(__a, __b, __c) \
4623 __builtin_neon_vst1_lanev16qi (__a, __b, __c)
4625 #define vst1q_lane_s16(__a, __b, __c) \
4626 __builtin_neon_vst1_lanev8hi (__a, __b, __c)
4628 #define vst1q_lane_s32(__a, __b, __c) \
4629 __builtin_neon_vst1_lanev4si (__a, __b, __c)
4631 #define vst1q_lane_f32(__a, __b, __c) \
4632 __builtin_neon_vst1_lanev4sf (__a, __b, __c)
4634 #define vst1q_lane_u8(__a, __b, __c) \
4635 __builtin_neon_vst1_lanev16qi (__a, __b, __c)
4637 #define vst1q_lane_u16(__a, __b, __c) \
4638 __builtin_neon_vst1_lanev8hi (__a, __b, __c)
4640 #define vst1q_lane_u32(__a, __b, __c) \
4641 __builtin_neon_vst1_lanev4si (__a, __b, __c)
4643 #define vst1q_lane_p8(__a, __b, __c) \
4644 __builtin_neon_vst1_lanev16qi (__a, __b, __c)
4646 #define vst1q_lane_p16(__a, __b, __c) \
4647 __builtin_neon_vst1_lanev8hi (__a, __b, __c)
4649 #define vst1q_lane_s64(__a, __b, __c) \
4650 __builtin_neon_vst1_lanev2di (__a, __b, __c)
4652 #define vst1q_lane_u64(__a, __b, __c) \
4653 __builtin_neon_vst1_lanev2di (__a, __b, __c)
4655 #define vld2_s8(__a) \
4656 ({ \
4657 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4658 __rv.__o = __builtin_neon_vld2v8qi (__a); \
4659 __rv.__i; \
4662 #define vld2_s16(__a) \
4663 ({ \
4664 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4665 __rv.__o = __builtin_neon_vld2v4hi (__a); \
4666 __rv.__i; \
4669 #define vld2_s32(__a) \
4670 ({ \
4671 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4672 __rv.__o = __builtin_neon_vld2v2si (__a); \
4673 __rv.__i; \
4676 #define vld2_f32(__a) \
4677 ({ \
4678 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4679 __rv.__o = __builtin_neon_vld2v2sf (__a); \
4680 __rv.__i; \
4683 #define vld2_u8(__a) \
4684 ({ \
4685 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4686 __rv.__o = __builtin_neon_vld2v8qi (__a); \
4687 __rv.__i; \
4690 #define vld2_u16(__a) \
4691 ({ \
4692 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4693 __rv.__o = __builtin_neon_vld2v4hi (__a); \
4694 __rv.__i; \
4697 #define vld2_u32(__a) \
4698 ({ \
4699 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4700 __rv.__o = __builtin_neon_vld2v2si (__a); \
4701 __rv.__i; \
4704 #define vld2_p8(__a) \
4705 ({ \
4706 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4707 __rv.__o = __builtin_neon_vld2v8qi (__a); \
4708 __rv.__i; \
4711 #define vld2_p16(__a) \
4712 ({ \
4713 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4714 __rv.__o = __builtin_neon_vld2v4hi (__a); \
4715 __rv.__i; \
4718 #define vld2_s64(__a) \
4719 ({ \
4720 union { int64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4721 __rv.__o = __builtin_neon_vld2v1di (__a); \
4722 __rv.__i; \
4725 #define vld2_u64(__a) \
4726 ({ \
4727 union { uint64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4728 __rv.__o = __builtin_neon_vld2v1di (__a); \
4729 __rv.__i; \
4732 #define vld2q_s8(__a) \
4733 ({ \
4734 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4735 __rv.__o = __builtin_neon_vld2v16qi (__a); \
4736 __rv.__i; \
4739 #define vld2q_s16(__a) \
4740 ({ \
4741 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4742 __rv.__o = __builtin_neon_vld2v8hi (__a); \
4743 __rv.__i; \
4746 #define vld2q_s32(__a) \
4747 ({ \
4748 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4749 __rv.__o = __builtin_neon_vld2v4si (__a); \
4750 __rv.__i; \
4753 #define vld2q_f32(__a) \
4754 ({ \
4755 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4756 __rv.__o = __builtin_neon_vld2v4sf (__a); \
4757 __rv.__i; \
4760 #define vld2q_u8(__a) \
4761 ({ \
4762 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4763 __rv.__o = __builtin_neon_vld2v16qi (__a); \
4764 __rv.__i; \
4767 #define vld2q_u16(__a) \
4768 ({ \
4769 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4770 __rv.__o = __builtin_neon_vld2v8hi (__a); \
4771 __rv.__i; \
4774 #define vld2q_u32(__a) \
4775 ({ \
4776 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4777 __rv.__o = __builtin_neon_vld2v4si (__a); \
4778 __rv.__i; \
4781 #define vld2q_p8(__a) \
4782 ({ \
4783 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4784 __rv.__o = __builtin_neon_vld2v16qi (__a); \
4785 __rv.__i; \
4788 #define vld2q_p16(__a) \
4789 ({ \
4790 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4791 __rv.__o = __builtin_neon_vld2v8hi (__a); \
4792 __rv.__i; \
4795 #define vld2_lane_s8(__a, __b, __c) \
4796 ({ \
4797 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4798 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4799 __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \
4800 __rv.__i; \
4803 #define vld2_lane_s16(__a, __b, __c) \
4804 ({ \
4805 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
4806 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4807 __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \
4808 __rv.__i; \
4811 #define vld2_lane_s32(__a, __b, __c) \
4812 ({ \
4813 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
4814 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4815 __rv.__o = __builtin_neon_vld2_lanev2si (__a, __bu.__o, __c); \
4816 __rv.__i; \
4819 #define vld2_lane_f32(__a, __b, __c) \
4820 ({ \
4821 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __bu = { __b }; \
4822 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4823 __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); \
4824 __rv.__i; \
4827 #define vld2_lane_u8(__a, __b, __c) \
4828 ({ \
4829 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4830 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4831 __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \
4832 __rv.__i; \
4835 #define vld2_lane_u16(__a, __b, __c) \
4836 ({ \
4837 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
4838 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4839 __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \
4840 __rv.__i; \
4843 #define vld2_lane_u32(__a, __b, __c) \
4844 ({ \
4845 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
4846 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4847 __rv.__o = __builtin_neon_vld2_lanev2si (__a, __bu.__o, __c); \
4848 __rv.__i; \
4851 #define vld2_lane_p8(__a, __b, __c) \
4852 ({ \
4853 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4854 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4855 __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \
4856 __rv.__i; \
4859 #define vld2_lane_p16(__a, __b, __c) \
4860 ({ \
4861 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
4862 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4863 __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \
4864 __rv.__i; \
4867 #define vld2q_lane_s16(__a, __b, __c) \
4868 ({ \
4869 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
4870 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4871 __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \
4872 __rv.__i; \
4875 #define vld2q_lane_s32(__a, __b, __c) \
4876 ({ \
4877 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
4878 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4879 __rv.__o = __builtin_neon_vld2_lanev4si (__a, __bu.__o, __c); \
4880 __rv.__i; \
4883 #define vld2q_lane_f32(__a, __b, __c) \
4884 ({ \
4885 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __bu = { __b }; \
4886 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4887 __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); \
4888 __rv.__i; \
4891 #define vld2q_lane_u16(__a, __b, __c) \
4892 ({ \
4893 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
4894 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4895 __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \
4896 __rv.__i; \
4899 #define vld2q_lane_u32(__a, __b, __c) \
4900 ({ \
4901 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
4902 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4903 __rv.__o = __builtin_neon_vld2_lanev4si (__a, __bu.__o, __c); \
4904 __rv.__i; \
4907 #define vld2q_lane_p16(__a, __b, __c) \
4908 ({ \
4909 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
4910 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4911 __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \
4912 __rv.__i; \
4915 #define vld2_dup_s8(__a) \
4916 ({ \
4917 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4918 __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \
4919 __rv.__i; \
4922 #define vld2_dup_s16(__a) \
4923 ({ \
4924 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4925 __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \
4926 __rv.__i; \
4929 #define vld2_dup_s32(__a) \
4930 ({ \
4931 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4932 __rv.__o = __builtin_neon_vld2_dupv2si (__a); \
4933 __rv.__i; \
4936 #define vld2_dup_f32(__a) \
4937 ({ \
4938 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4939 __rv.__o = __builtin_neon_vld2_dupv2sf (__a); \
4940 __rv.__i; \
4943 #define vld2_dup_u8(__a) \
4944 ({ \
4945 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4946 __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \
4947 __rv.__i; \
4950 #define vld2_dup_u16(__a) \
4951 ({ \
4952 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4953 __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \
4954 __rv.__i; \
4957 #define vld2_dup_u32(__a) \
4958 ({ \
4959 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4960 __rv.__o = __builtin_neon_vld2_dupv2si (__a); \
4961 __rv.__i; \
4964 #define vld2_dup_p8(__a) \
4965 ({ \
4966 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4967 __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \
4968 __rv.__i; \
4971 #define vld2_dup_p16(__a) \
4972 ({ \
4973 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4974 __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \
4975 __rv.__i; \
4978 #define vld2_dup_s64(__a) \
4979 ({ \
4980 union { int64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4981 __rv.__o = __builtin_neon_vld2_dupv1di (__a); \
4982 __rv.__i; \
4985 #define vld2_dup_u64(__a) \
4986 ({ \
4987 union { uint64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4988 __rv.__o = __builtin_neon_vld2_dupv1di (__a); \
4989 __rv.__i; \
4992 #define vst2_s8(__a, __b) \
4993 ({ \
4994 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4995 __builtin_neon_vst2v8qi (__a, __bu.__o); \
4998 #define vst2_s16(__a, __b) \
4999 ({ \
5000 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5001 __builtin_neon_vst2v4hi (__a, __bu.__o); \
5004 #define vst2_s32(__a, __b) \
5005 ({ \
5006 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5007 __builtin_neon_vst2v2si (__a, __bu.__o); \
5010 #define vst2_f32(__a, __b) \
5011 ({ \
5012 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __bu = { __b }; \
5013 __builtin_neon_vst2v2sf (__a, __bu.__o); \
5016 #define vst2_u8(__a, __b) \
5017 ({ \
5018 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5019 __builtin_neon_vst2v8qi (__a, __bu.__o); \
5022 #define vst2_u16(__a, __b) \
5023 ({ \
5024 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5025 __builtin_neon_vst2v4hi (__a, __bu.__o); \
5028 #define vst2_u32(__a, __b) \
5029 ({ \
5030 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5031 __builtin_neon_vst2v2si (__a, __bu.__o); \
5034 #define vst2_p8(__a, __b) \
5035 ({ \
5036 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5037 __builtin_neon_vst2v8qi (__a, __bu.__o); \
5040 #define vst2_p16(__a, __b) \
5041 ({ \
5042 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5043 __builtin_neon_vst2v4hi (__a, __bu.__o); \
5046 #define vst2_s64(__a, __b) \
5047 ({ \
5048 union { int64x1x2_t __i; __builtin_neon_v1di2 __o; } __bu = { __b }; \
5049 __builtin_neon_vst2v1di (__a, __bu.__o); \
5052 #define vst2_u64(__a, __b) \
5053 ({ \
5054 union { uint64x1x2_t __i; __builtin_neon_v1di2 __o; } __bu = { __b }; \
5055 __builtin_neon_vst2v1di (__a, __bu.__o); \
5058 #define vst2q_s8(__a, __b) \
5059 ({ \
5060 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __bu = { __b }; \
5061 __builtin_neon_vst2v16qi (__a, __bu.__o); \
5064 #define vst2q_s16(__a, __b) \
5065 ({ \
5066 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5067 __builtin_neon_vst2v8hi (__a, __bu.__o); \
5070 #define vst2q_s32(__a, __b) \
5071 ({ \
5072 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5073 __builtin_neon_vst2v4si (__a, __bu.__o); \
5076 #define vst2q_f32(__a, __b) \
5077 ({ \
5078 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __bu = { __b }; \
5079 __builtin_neon_vst2v4sf (__a, __bu.__o); \
5082 #define vst2q_u8(__a, __b) \
5083 ({ \
5084 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __bu = { __b }; \
5085 __builtin_neon_vst2v16qi (__a, __bu.__o); \
5088 #define vst2q_u16(__a, __b) \
5089 ({ \
5090 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5091 __builtin_neon_vst2v8hi (__a, __bu.__o); \
5094 #define vst2q_u32(__a, __b) \
5095 ({ \
5096 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5097 __builtin_neon_vst2v4si (__a, __bu.__o); \
5100 #define vst2q_p8(__a, __b) \
5101 ({ \
5102 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __bu = { __b }; \
5103 __builtin_neon_vst2v16qi (__a, __bu.__o); \
5106 #define vst2q_p16(__a, __b) \
5107 ({ \
5108 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5109 __builtin_neon_vst2v8hi (__a, __bu.__o); \
5112 #define vst2_lane_s8(__a, __b, __c) \
5113 ({ \
5114 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5115 __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \
5118 #define vst2_lane_s16(__a, __b, __c) \
5119 ({ \
5120 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5121 __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \
5124 #define vst2_lane_s32(__a, __b, __c) \
5125 ({ \
5126 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5127 __builtin_neon_vst2_lanev2si (__a, __bu.__o, __c); \
5130 #define vst2_lane_f32(__a, __b, __c) \
5131 ({ \
5132 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __bu = { __b }; \
5133 __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); \
5136 #define vst2_lane_u8(__a, __b, __c) \
5137 ({ \
5138 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5139 __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \
5142 #define vst2_lane_u16(__a, __b, __c) \
5143 ({ \
5144 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5145 __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \
5148 #define vst2_lane_u32(__a, __b, __c) \
5149 ({ \
5150 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5151 __builtin_neon_vst2_lanev2si (__a, __bu.__o, __c); \
5154 #define vst2_lane_p8(__a, __b, __c) \
5155 ({ \
5156 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5157 __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \
5160 #define vst2_lane_p16(__a, __b, __c) \
5161 ({ \
5162 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5163 __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \
5166 #define vst2q_lane_s16(__a, __b, __c) \
5167 ({ \
5168 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5169 __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \
5172 #define vst2q_lane_s32(__a, __b, __c) \
5173 ({ \
5174 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5175 __builtin_neon_vst2_lanev4si (__a, __bu.__o, __c); \
5178 #define vst2q_lane_f32(__a, __b, __c) \
5179 ({ \
5180 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __bu = { __b }; \
5181 __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); \
5184 #define vst2q_lane_u16(__a, __b, __c) \
5185 ({ \
5186 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5187 __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \
5190 #define vst2q_lane_u32(__a, __b, __c) \
5191 ({ \
5192 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5193 __builtin_neon_vst2_lanev4si (__a, __bu.__o, __c); \
5196 #define vst2q_lane_p16(__a, __b, __c) \
5197 ({ \
5198 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5199 __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \
5202 #define vld3_s8(__a) \
5203 ({ \
5204 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5205 __rv.__o = __builtin_neon_vld3v8qi (__a); \
5206 __rv.__i; \
5209 #define vld3_s16(__a) \
5210 ({ \
5211 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5212 __rv.__o = __builtin_neon_vld3v4hi (__a); \
5213 __rv.__i; \
5216 #define vld3_s32(__a) \
5217 ({ \
5218 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5219 __rv.__o = __builtin_neon_vld3v2si (__a); \
5220 __rv.__i; \
5223 #define vld3_f32(__a) \
5224 ({ \
5225 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __rv; \
5226 __rv.__o = __builtin_neon_vld3v2sf (__a); \
5227 __rv.__i; \
5230 #define vld3_u8(__a) \
5231 ({ \
5232 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5233 __rv.__o = __builtin_neon_vld3v8qi (__a); \
5234 __rv.__i; \
5237 #define vld3_u16(__a) \
5238 ({ \
5239 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5240 __rv.__o = __builtin_neon_vld3v4hi (__a); \
5241 __rv.__i; \
5244 #define vld3_u32(__a) \
5245 ({ \
5246 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5247 __rv.__o = __builtin_neon_vld3v2si (__a); \
5248 __rv.__i; \
5251 #define vld3_p8(__a) \
5252 ({ \
5253 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5254 __rv.__o = __builtin_neon_vld3v8qi (__a); \
5255 __rv.__i; \
5258 #define vld3_p16(__a) \
5259 ({ \
5260 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5261 __rv.__o = __builtin_neon_vld3v4hi (__a); \
5262 __rv.__i; \
5265 #define vld3_s64(__a) \
5266 ({ \
5267 union { int64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5268 __rv.__o = __builtin_neon_vld3v1di (__a); \
5269 __rv.__i; \
5272 #define vld3_u64(__a) \
5273 ({ \
5274 union { uint64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5275 __rv.__o = __builtin_neon_vld3v1di (__a); \
5276 __rv.__i; \
5279 #define vld3q_s8(__a) \
5280 ({ \
5281 union { int8x16x3_t __i; __builtin_neon_v16qi3 __o; } __rv; \
5282 __rv.__o = __builtin_neon_vld3v16qi (__a); \
5283 __rv.__i; \
5286 #define vld3q_s16(__a) \
5287 ({ \
5288 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5289 __rv.__o = __builtin_neon_vld3v8hi (__a); \
5290 __rv.__i; \
5293 #define vld3q_s32(__a) \
5294 ({ \
5295 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5296 __rv.__o = __builtin_neon_vld3v4si (__a); \
5297 __rv.__i; \
5300 #define vld3q_f32(__a) \
5301 ({ \
5302 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __rv; \
5303 __rv.__o = __builtin_neon_vld3v4sf (__a); \
5304 __rv.__i; \
5307 #define vld3q_u8(__a) \
5308 ({ \
5309 union { uint8x16x3_t __i; __builtin_neon_v16qi3 __o; } __rv; \
5310 __rv.__o = __builtin_neon_vld3v16qi (__a); \
5311 __rv.__i; \
5314 #define vld3q_u16(__a) \
5315 ({ \
5316 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5317 __rv.__o = __builtin_neon_vld3v8hi (__a); \
5318 __rv.__i; \
5321 #define vld3q_u32(__a) \
5322 ({ \
5323 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5324 __rv.__o = __builtin_neon_vld3v4si (__a); \
5325 __rv.__i; \
5328 #define vld3q_p8(__a) \
5329 ({ \
5330 union { poly8x16x3_t __i; __builtin_neon_v16qi3 __o; } __rv; \
5331 __rv.__o = __builtin_neon_vld3v16qi (__a); \
5332 __rv.__i; \
5335 #define vld3q_p16(__a) \
5336 ({ \
5337 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5338 __rv.__o = __builtin_neon_vld3v8hi (__a); \
5339 __rv.__i; \
5342 #define vld3_lane_s8(__a, __b, __c) \
5343 ({ \
5344 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5345 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5346 __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \
5347 __rv.__i; \
5350 #define vld3_lane_s16(__a, __b, __c) \
5351 ({ \
5352 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5353 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5354 __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \
5355 __rv.__i; \
5358 #define vld3_lane_s32(__a, __b, __c) \
5359 ({ \
5360 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5361 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5362 __rv.__o = __builtin_neon_vld3_lanev2si (__a, __bu.__o, __c); \
5363 __rv.__i; \
5366 #define vld3_lane_f32(__a, __b, __c) \
5367 ({ \
5368 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __bu = { __b }; \
5369 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __rv; \
5370 __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); \
5371 __rv.__i; \
5374 #define vld3_lane_u8(__a, __b, __c) \
5375 ({ \
5376 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5377 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5378 __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \
5379 __rv.__i; \
5382 #define vld3_lane_u16(__a, __b, __c) \
5383 ({ \
5384 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5385 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5386 __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \
5387 __rv.__i; \
5390 #define vld3_lane_u32(__a, __b, __c) \
5391 ({ \
5392 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5393 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5394 __rv.__o = __builtin_neon_vld3_lanev2si (__a, __bu.__o, __c); \
5395 __rv.__i; \
5398 #define vld3_lane_p8(__a, __b, __c) \
5399 ({ \
5400 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5401 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5402 __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \
5403 __rv.__i; \
5406 #define vld3_lane_p16(__a, __b, __c) \
5407 ({ \
5408 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5409 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5410 __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \
5411 __rv.__i; \
5414 #define vld3q_lane_s16(__a, __b, __c) \
5415 ({ \
5416 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5417 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5418 __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \
5419 __rv.__i; \
5422 #define vld3q_lane_s32(__a, __b, __c) \
5423 ({ \
5424 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5425 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5426 __rv.__o = __builtin_neon_vld3_lanev4si (__a, __bu.__o, __c); \
5427 __rv.__i; \
5430 #define vld3q_lane_f32(__a, __b, __c) \
5431 ({ \
5432 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __bu = { __b }; \
5433 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __rv; \
5434 __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); \
5435 __rv.__i; \
5438 #define vld3q_lane_u16(__a, __b, __c) \
5439 ({ \
5440 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5441 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5442 __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \
5443 __rv.__i; \
5446 #define vld3q_lane_u32(__a, __b, __c) \
5447 ({ \
5448 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5449 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5450 __rv.__o = __builtin_neon_vld3_lanev4si (__a, __bu.__o, __c); \
5451 __rv.__i; \
5454 #define vld3q_lane_p16(__a, __b, __c) \
5455 ({ \
5456 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5457 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5458 __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \
5459 __rv.__i; \
5462 #define vld3_dup_s8(__a) \
5463 ({ \
5464 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5465 __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \
5466 __rv.__i; \
5469 #define vld3_dup_s16(__a) \
5470 ({ \
5471 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5472 __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \
5473 __rv.__i; \
5476 #define vld3_dup_s32(__a) \
5477 ({ \
5478 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5479 __rv.__o = __builtin_neon_vld3_dupv2si (__a); \
5480 __rv.__i; \
5483 #define vld3_dup_f32(__a) \
5484 ({ \
5485 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __rv; \
5486 __rv.__o = __builtin_neon_vld3_dupv2sf (__a); \
5487 __rv.__i; \
5490 #define vld3_dup_u8(__a) \
5491 ({ \
5492 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5493 __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \
5494 __rv.__i; \
5497 #define vld3_dup_u16(__a) \
5498 ({ \
5499 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5500 __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \
5501 __rv.__i; \
5504 #define vld3_dup_u32(__a) \
5505 ({ \
5506 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5507 __rv.__o = __builtin_neon_vld3_dupv2si (__a); \
5508 __rv.__i; \
5511 #define vld3_dup_p8(__a) \
5512 ({ \
5513 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5514 __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \
5515 __rv.__i; \
5518 #define vld3_dup_p16(__a) \
5519 ({ \
5520 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5521 __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \
5522 __rv.__i; \
5525 #define vld3_dup_s64(__a) \
5526 ({ \
5527 union { int64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5528 __rv.__o = __builtin_neon_vld3_dupv1di (__a); \
5529 __rv.__i; \
5532 #define vld3_dup_u64(__a) \
5533 ({ \
5534 union { uint64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5535 __rv.__o = __builtin_neon_vld3_dupv1di (__a); \
5536 __rv.__i; \
5539 #define vst3_s8(__a, __b) \
5540 ({ \
5541 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5542 __builtin_neon_vst3v8qi (__a, __bu.__o); \
5545 #define vst3_s16(__a, __b) \
5546 ({ \
5547 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5548 __builtin_neon_vst3v4hi (__a, __bu.__o); \
5551 #define vst3_s32(__a, __b) \
5552 ({ \
5553 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5554 __builtin_neon_vst3v2si (__a, __bu.__o); \
5557 #define vst3_f32(__a, __b) \
5558 ({ \
5559 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __bu = { __b }; \
5560 __builtin_neon_vst3v2sf (__a, __bu.__o); \
5563 #define vst3_u8(__a, __b) \
5564 ({ \
5565 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5566 __builtin_neon_vst3v8qi (__a, __bu.__o); \
5569 #define vst3_u16(__a, __b) \
5570 ({ \
5571 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5572 __builtin_neon_vst3v4hi (__a, __bu.__o); \
5575 #define vst3_u32(__a, __b) \
5576 ({ \
5577 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5578 __builtin_neon_vst3v2si (__a, __bu.__o); \
5581 #define vst3_p8(__a, __b) \
5582 ({ \
5583 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5584 __builtin_neon_vst3v8qi (__a, __bu.__o); \
5587 #define vst3_p16(__a, __b) \
5588 ({ \
5589 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5590 __builtin_neon_vst3v4hi (__a, __bu.__o); \
5593 #define vst3_s64(__a, __b) \
5594 ({ \
5595 union { int64x1x3_t __i; __builtin_neon_v1di3 __o; } __bu = { __b }; \
5596 __builtin_neon_vst3v1di (__a, __bu.__o); \
5599 #define vst3_u64(__a, __b) \
5600 ({ \
5601 union { uint64x1x3_t __i; __builtin_neon_v1di3 __o; } __bu = { __b }; \
5602 __builtin_neon_vst3v1di (__a, __bu.__o); \
5605 #define vst3q_s8(__a, __b) \
5606 ({ \
5607 union { int8x16x3_t __i; __builtin_neon_v16qi3 __o; } __bu = { __b }; \
5608 __builtin_neon_vst3v16qi (__a, __bu.__o); \
5611 #define vst3q_s16(__a, __b) \
5612 ({ \
5613 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5614 __builtin_neon_vst3v8hi (__a, __bu.__o); \
5617 #define vst3q_s32(__a, __b) \
5618 ({ \
5619 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5620 __builtin_neon_vst3v4si (__a, __bu.__o); \
5623 #define vst3q_f32(__a, __b) \
5624 ({ \
5625 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __bu = { __b }; \
5626 __builtin_neon_vst3v4sf (__a, __bu.__o); \
5629 #define vst3q_u8(__a, __b) \
5630 ({ \
5631 union { uint8x16x3_t __i; __builtin_neon_v16qi3 __o; } __bu = { __b }; \
5632 __builtin_neon_vst3v16qi (__a, __bu.__o); \
5635 #define vst3q_u16(__a, __b) \
5636 ({ \
5637 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5638 __builtin_neon_vst3v8hi (__a, __bu.__o); \
5641 #define vst3q_u32(__a, __b) \
5642 ({ \
5643 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5644 __builtin_neon_vst3v4si (__a, __bu.__o); \
5647 #define vst3q_p8(__a, __b) \
5648 ({ \
5649 union { poly8x16x3_t __i; __builtin_neon_v16qi3 __o; } __bu = { __b }; \
5650 __builtin_neon_vst3v16qi (__a, __bu.__o); \
5653 #define vst3q_p16(__a, __b) \
5654 ({ \
5655 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5656 __builtin_neon_vst3v8hi (__a, __bu.__o); \
5659 #define vst3_lane_s8(__a, __b, __c) \
5660 ({ \
5661 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5662 __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \
5665 #define vst3_lane_s16(__a, __b, __c) \
5666 ({ \
5667 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5668 __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \
5671 #define vst3_lane_s32(__a, __b, __c) \
5672 ({ \
5673 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5674 __builtin_neon_vst3_lanev2si (__a, __bu.__o, __c); \
5677 #define vst3_lane_f32(__a, __b, __c) \
5678 ({ \
5679 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __bu = { __b }; \
5680 __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); \
5683 #define vst3_lane_u8(__a, __b, __c) \
5684 ({ \
5685 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5686 __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \
5689 #define vst3_lane_u16(__a, __b, __c) \
5690 ({ \
5691 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5692 __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \
5695 #define vst3_lane_u32(__a, __b, __c) \
5696 ({ \
5697 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5698 __builtin_neon_vst3_lanev2si (__a, __bu.__o, __c); \
5701 #define vst3_lane_p8(__a, __b, __c) \
5702 ({ \
5703 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5704 __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \
5707 #define vst3_lane_p16(__a, __b, __c) \
5708 ({ \
5709 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5710 __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \
5713 #define vst3q_lane_s16(__a, __b, __c) \
5714 ({ \
5715 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5716 __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \
5719 #define vst3q_lane_s32(__a, __b, __c) \
5720 ({ \
5721 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5722 __builtin_neon_vst3_lanev4si (__a, __bu.__o, __c); \
5725 #define vst3q_lane_f32(__a, __b, __c) \
5726 ({ \
5727 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __bu = { __b }; \
5728 __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); \
5731 #define vst3q_lane_u16(__a, __b, __c) \
5732 ({ \
5733 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5734 __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \
5737 #define vst3q_lane_u32(__a, __b, __c) \
5738 ({ \
5739 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5740 __builtin_neon_vst3_lanev4si (__a, __bu.__o, __c); \
5743 #define vst3q_lane_p16(__a, __b, __c) \
5744 ({ \
5745 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5746 __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \
5749 #define vld4_s8(__a) \
5750 ({ \
5751 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5752 __rv.__o = __builtin_neon_vld4v8qi (__a); \
5753 __rv.__i; \
5756 #define vld4_s16(__a) \
5757 ({ \
5758 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5759 __rv.__o = __builtin_neon_vld4v4hi (__a); \
5760 __rv.__i; \
5763 #define vld4_s32(__a) \
5764 ({ \
5765 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5766 __rv.__o = __builtin_neon_vld4v2si (__a); \
5767 __rv.__i; \
5770 #define vld4_f32(__a) \
5771 ({ \
5772 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __rv; \
5773 __rv.__o = __builtin_neon_vld4v2sf (__a); \
5774 __rv.__i; \
5777 #define vld4_u8(__a) \
5778 ({ \
5779 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5780 __rv.__o = __builtin_neon_vld4v8qi (__a); \
5781 __rv.__i; \
5784 #define vld4_u16(__a) \
5785 ({ \
5786 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5787 __rv.__o = __builtin_neon_vld4v4hi (__a); \
5788 __rv.__i; \
5791 #define vld4_u32(__a) \
5792 ({ \
5793 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5794 __rv.__o = __builtin_neon_vld4v2si (__a); \
5795 __rv.__i; \
5798 #define vld4_p8(__a) \
5799 ({ \
5800 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5801 __rv.__o = __builtin_neon_vld4v8qi (__a); \
5802 __rv.__i; \
5805 #define vld4_p16(__a) \
5806 ({ \
5807 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5808 __rv.__o = __builtin_neon_vld4v4hi (__a); \
5809 __rv.__i; \
5812 #define vld4_s64(__a) \
5813 ({ \
5814 union { int64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
5815 __rv.__o = __builtin_neon_vld4v1di (__a); \
5816 __rv.__i; \
5819 #define vld4_u64(__a) \
5820 ({ \
5821 union { uint64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
5822 __rv.__o = __builtin_neon_vld4v1di (__a); \
5823 __rv.__i; \
5826 #define vld4q_s8(__a) \
5827 ({ \
5828 union { int8x16x4_t __i; __builtin_neon_v16qi4 __o; } __rv; \
5829 __rv.__o = __builtin_neon_vld4v16qi (__a); \
5830 __rv.__i; \
5833 #define vld4q_s16(__a) \
5834 ({ \
5835 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5836 __rv.__o = __builtin_neon_vld4v8hi (__a); \
5837 __rv.__i; \
5840 #define vld4q_s32(__a) \
5841 ({ \
5842 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5843 __rv.__o = __builtin_neon_vld4v4si (__a); \
5844 __rv.__i; \
5847 #define vld4q_f32(__a) \
5848 ({ \
5849 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __rv; \
5850 __rv.__o = __builtin_neon_vld4v4sf (__a); \
5851 __rv.__i; \
5854 #define vld4q_u8(__a) \
5855 ({ \
5856 union { uint8x16x4_t __i; __builtin_neon_v16qi4 __o; } __rv; \
5857 __rv.__o = __builtin_neon_vld4v16qi (__a); \
5858 __rv.__i; \
5861 #define vld4q_u16(__a) \
5862 ({ \
5863 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5864 __rv.__o = __builtin_neon_vld4v8hi (__a); \
5865 __rv.__i; \
5868 #define vld4q_u32(__a) \
5869 ({ \
5870 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5871 __rv.__o = __builtin_neon_vld4v4si (__a); \
5872 __rv.__i; \
5875 #define vld4q_p8(__a) \
5876 ({ \
5877 union { poly8x16x4_t __i; __builtin_neon_v16qi4 __o; } __rv; \
5878 __rv.__o = __builtin_neon_vld4v16qi (__a); \
5879 __rv.__i; \
5882 #define vld4q_p16(__a) \
5883 ({ \
5884 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5885 __rv.__o = __builtin_neon_vld4v8hi (__a); \
5886 __rv.__i; \
5889 #define vld4_lane_s8(__a, __b, __c) \
5890 ({ \
5891 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
5892 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5893 __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \
5894 __rv.__i; \
5897 #define vld4_lane_s16(__a, __b, __c) \
5898 ({ \
5899 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
5900 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5901 __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \
5902 __rv.__i; \
5905 #define vld4_lane_s32(__a, __b, __c) \
5906 ({ \
5907 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
5908 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5909 __rv.__o = __builtin_neon_vld4_lanev2si (__a, __bu.__o, __c); \
5910 __rv.__i; \
5913 #define vld4_lane_f32(__a, __b, __c) \
5914 ({ \
5915 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __bu = { __b }; \
5916 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __rv; \
5917 __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); \
5918 __rv.__i; \
5921 #define vld4_lane_u8(__a, __b, __c) \
5922 ({ \
5923 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
5924 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5925 __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \
5926 __rv.__i; \
5929 #define vld4_lane_u16(__a, __b, __c) \
5930 ({ \
5931 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
5932 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5933 __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \
5934 __rv.__i; \
5937 #define vld4_lane_u32(__a, __b, __c) \
5938 ({ \
5939 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
5940 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5941 __rv.__o = __builtin_neon_vld4_lanev2si (__a, __bu.__o, __c); \
5942 __rv.__i; \
5945 #define vld4_lane_p8(__a, __b, __c) \
5946 ({ \
5947 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
5948 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5949 __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \
5950 __rv.__i; \
5953 #define vld4_lane_p16(__a, __b, __c) \
5954 ({ \
5955 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
5956 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5957 __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \
5958 __rv.__i; \
5961 #define vld4q_lane_s16(__a, __b, __c) \
5962 ({ \
5963 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
5964 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5965 __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \
5966 __rv.__i; \
5969 #define vld4q_lane_s32(__a, __b, __c) \
5970 ({ \
5971 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
5972 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5973 __rv.__o = __builtin_neon_vld4_lanev4si (__a, __bu.__o, __c); \
5974 __rv.__i; \
5977 #define vld4q_lane_f32(__a, __b, __c) \
5978 ({ \
5979 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __bu = { __b }; \
5980 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __rv; \
5981 __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); \
5982 __rv.__i; \
5985 #define vld4q_lane_u16(__a, __b, __c) \
5986 ({ \
5987 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
5988 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5989 __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \
5990 __rv.__i; \
5993 #define vld4q_lane_u32(__a, __b, __c) \
5994 ({ \
5995 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
5996 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5997 __rv.__o = __builtin_neon_vld4_lanev4si (__a, __bu.__o, __c); \
5998 __rv.__i; \
6001 #define vld4q_lane_p16(__a, __b, __c) \
6002 ({ \
6003 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6004 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
6005 __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \
6006 __rv.__i; \
6009 #define vld4_dup_s8(__a) \
6010 ({ \
6011 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
6012 __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \
6013 __rv.__i; \
6016 #define vld4_dup_s16(__a) \
6017 ({ \
6018 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
6019 __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \
6020 __rv.__i; \
6023 #define vld4_dup_s32(__a) \
6024 ({ \
6025 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
6026 __rv.__o = __builtin_neon_vld4_dupv2si (__a); \
6027 __rv.__i; \
6030 #define vld4_dup_f32(__a) \
6031 ({ \
6032 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __rv; \
6033 __rv.__o = __builtin_neon_vld4_dupv2sf (__a); \
6034 __rv.__i; \
6037 #define vld4_dup_u8(__a) \
6038 ({ \
6039 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
6040 __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \
6041 __rv.__i; \
6044 #define vld4_dup_u16(__a) \
6045 ({ \
6046 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
6047 __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \
6048 __rv.__i; \
6051 #define vld4_dup_u32(__a) \
6052 ({ \
6053 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
6054 __rv.__o = __builtin_neon_vld4_dupv2si (__a); \
6055 __rv.__i; \
6058 #define vld4_dup_p8(__a) \
6059 ({ \
6060 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
6061 __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \
6062 __rv.__i; \
6065 #define vld4_dup_p16(__a) \
6066 ({ \
6067 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
6068 __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \
6069 __rv.__i; \
6072 #define vld4_dup_s64(__a) \
6073 ({ \
6074 union { int64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
6075 __rv.__o = __builtin_neon_vld4_dupv1di (__a); \
6076 __rv.__i; \
6079 #define vld4_dup_u64(__a) \
6080 ({ \
6081 union { uint64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
6082 __rv.__o = __builtin_neon_vld4_dupv1di (__a); \
6083 __rv.__i; \
6086 #define vst4_s8(__a, __b) \
6087 ({ \
6088 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6089 __builtin_neon_vst4v8qi (__a, __bu.__o); \
6092 #define vst4_s16(__a, __b) \
6093 ({ \
6094 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6095 __builtin_neon_vst4v4hi (__a, __bu.__o); \
6098 #define vst4_s32(__a, __b) \
6099 ({ \
6100 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6101 __builtin_neon_vst4v2si (__a, __bu.__o); \
6104 #define vst4_f32(__a, __b) \
6105 ({ \
6106 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __bu = { __b }; \
6107 __builtin_neon_vst4v2sf (__a, __bu.__o); \
6110 #define vst4_u8(__a, __b) \
6111 ({ \
6112 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6113 __builtin_neon_vst4v8qi (__a, __bu.__o); \
6116 #define vst4_u16(__a, __b) \
6117 ({ \
6118 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6119 __builtin_neon_vst4v4hi (__a, __bu.__o); \
6122 #define vst4_u32(__a, __b) \
6123 ({ \
6124 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6125 __builtin_neon_vst4v2si (__a, __bu.__o); \
6128 #define vst4_p8(__a, __b) \
6129 ({ \
6130 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6131 __builtin_neon_vst4v8qi (__a, __bu.__o); \
6134 #define vst4_p16(__a, __b) \
6135 ({ \
6136 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6137 __builtin_neon_vst4v4hi (__a, __bu.__o); \
6140 #define vst4_s64(__a, __b) \
6141 ({ \
6142 union { int64x1x4_t __i; __builtin_neon_v1di4 __o; } __bu = { __b }; \
6143 __builtin_neon_vst4v1di (__a, __bu.__o); \
6146 #define vst4_u64(__a, __b) \
6147 ({ \
6148 union { uint64x1x4_t __i; __builtin_neon_v1di4 __o; } __bu = { __b }; \
6149 __builtin_neon_vst4v1di (__a, __bu.__o); \
6152 #define vst4q_s8(__a, __b) \
6153 ({ \
6154 union { int8x16x4_t __i; __builtin_neon_v16qi4 __o; } __bu = { __b }; \
6155 __builtin_neon_vst4v16qi (__a, __bu.__o); \
6158 #define vst4q_s16(__a, __b) \
6159 ({ \
6160 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6161 __builtin_neon_vst4v8hi (__a, __bu.__o); \
6164 #define vst4q_s32(__a, __b) \
6165 ({ \
6166 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6167 __builtin_neon_vst4v4si (__a, __bu.__o); \
6170 #define vst4q_f32(__a, __b) \
6171 ({ \
6172 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __bu = { __b }; \
6173 __builtin_neon_vst4v4sf (__a, __bu.__o); \
6176 #define vst4q_u8(__a, __b) \
6177 ({ \
6178 union { uint8x16x4_t __i; __builtin_neon_v16qi4 __o; } __bu = { __b }; \
6179 __builtin_neon_vst4v16qi (__a, __bu.__o); \
6182 #define vst4q_u16(__a, __b) \
6183 ({ \
6184 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6185 __builtin_neon_vst4v8hi (__a, __bu.__o); \
6188 #define vst4q_u32(__a, __b) \
6189 ({ \
6190 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6191 __builtin_neon_vst4v4si (__a, __bu.__o); \
6194 #define vst4q_p8(__a, __b) \
6195 ({ \
6196 union { poly8x16x4_t __i; __builtin_neon_v16qi4 __o; } __bu = { __b }; \
6197 __builtin_neon_vst4v16qi (__a, __bu.__o); \
6200 #define vst4q_p16(__a, __b) \
6201 ({ \
6202 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6203 __builtin_neon_vst4v8hi (__a, __bu.__o); \
6206 #define vst4_lane_s8(__a, __b, __c) \
6207 ({ \
6208 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6209 __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \
6212 #define vst4_lane_s16(__a, __b, __c) \
6213 ({ \
6214 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6215 __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \
6218 #define vst4_lane_s32(__a, __b, __c) \
6219 ({ \
6220 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6221 __builtin_neon_vst4_lanev2si (__a, __bu.__o, __c); \
6224 #define vst4_lane_f32(__a, __b, __c) \
6225 ({ \
6226 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __bu = { __b }; \
6227 __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); \
6230 #define vst4_lane_u8(__a, __b, __c) \
6231 ({ \
6232 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6233 __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \
6236 #define vst4_lane_u16(__a, __b, __c) \
6237 ({ \
6238 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6239 __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \
6242 #define vst4_lane_u32(__a, __b, __c) \
6243 ({ \
6244 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6245 __builtin_neon_vst4_lanev2si (__a, __bu.__o, __c); \
6248 #define vst4_lane_p8(__a, __b, __c) \
6249 ({ \
6250 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6251 __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \
6254 #define vst4_lane_p16(__a, __b, __c) \
6255 ({ \
6256 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6257 __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \
6260 #define vst4q_lane_s16(__a, __b, __c) \
6261 ({ \
6262 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6263 __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \
6266 #define vst4q_lane_s32(__a, __b, __c) \
6267 ({ \
6268 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6269 __builtin_neon_vst4_lanev4si (__a, __bu.__o, __c); \
6272 #define vst4q_lane_f32(__a, __b, __c) \
6273 ({ \
6274 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __bu = { __b }; \
6275 __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); \
6278 #define vst4q_lane_u16(__a, __b, __c) \
6279 ({ \
6280 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6281 __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \
6284 #define vst4q_lane_u32(__a, __b, __c) \
6285 ({ \
6286 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6287 __builtin_neon_vst4_lanev4si (__a, __bu.__o, __c); \
6290 #define vst4q_lane_p16(__a, __b, __c) \
6291 ({ \
6292 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6293 __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \
6296 #define vand_s8(__a, __b) \
6297 (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1)
6299 #define vand_s16(__a, __b) \
6300 (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1)
6302 #define vand_s32(__a, __b) \
6303 (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1)
6305 #define vand_s64(__a, __b) \
6306 (int64x1_t)__builtin_neon_vandv1di (__a, __b, 1)
6308 #define vand_u8(__a, __b) \
6309 (uint8x8_t)__builtin_neon_vandv8qi (__a, __b, 0)
6311 #define vand_u16(__a, __b) \
6312 (uint16x4_t)__builtin_neon_vandv4hi (__a, __b, 0)
6314 #define vand_u32(__a, __b) \
6315 (uint32x2_t)__builtin_neon_vandv2si (__a, __b, 0)
6317 #define vand_u64(__a, __b) \
6318 (uint64x1_t)__builtin_neon_vandv1di (__a, __b, 0)
6320 #define vandq_s8(__a, __b) \
6321 (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1)
6323 #define vandq_s16(__a, __b) \
6324 (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1)
6326 #define vandq_s32(__a, __b) \
6327 (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1)
6329 #define vandq_s64(__a, __b) \
6330 (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1)
6332 #define vandq_u8(__a, __b) \
6333 (uint8x16_t)__builtin_neon_vandv16qi (__a, __b, 0)
6335 #define vandq_u16(__a, __b) \
6336 (uint16x8_t)__builtin_neon_vandv8hi (__a, __b, 0)
6338 #define vandq_u32(__a, __b) \
6339 (uint32x4_t)__builtin_neon_vandv4si (__a, __b, 0)
6341 #define vandq_u64(__a, __b) \
6342 (uint64x2_t)__builtin_neon_vandv2di (__a, __b, 0)
6344 #define vorr_s8(__a, __b) \
6345 (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1)
6347 #define vorr_s16(__a, __b) \
6348 (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1)
6350 #define vorr_s32(__a, __b) \
6351 (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1)
6353 #define vorr_s64(__a, __b) \
6354 (int64x1_t)__builtin_neon_vorrv1di (__a, __b, 1)
6356 #define vorr_u8(__a, __b) \
6357 (uint8x8_t)__builtin_neon_vorrv8qi (__a, __b, 0)
6359 #define vorr_u16(__a, __b) \
6360 (uint16x4_t)__builtin_neon_vorrv4hi (__a, __b, 0)
6362 #define vorr_u32(__a, __b) \
6363 (uint32x2_t)__builtin_neon_vorrv2si (__a, __b, 0)
6365 #define vorr_u64(__a, __b) \
6366 (uint64x1_t)__builtin_neon_vorrv1di (__a, __b, 0)
6368 #define vorrq_s8(__a, __b) \
6369 (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1)
6371 #define vorrq_s16(__a, __b) \
6372 (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1)
6374 #define vorrq_s32(__a, __b) \
6375 (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1)
6377 #define vorrq_s64(__a, __b) \
6378 (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1)
6380 #define vorrq_u8(__a, __b) \
6381 (uint8x16_t)__builtin_neon_vorrv16qi (__a, __b, 0)
6383 #define vorrq_u16(__a, __b) \
6384 (uint16x8_t)__builtin_neon_vorrv8hi (__a, __b, 0)
6386 #define vorrq_u32(__a, __b) \
6387 (uint32x4_t)__builtin_neon_vorrv4si (__a, __b, 0)
6389 #define vorrq_u64(__a, __b) \
6390 (uint64x2_t)__builtin_neon_vorrv2di (__a, __b, 0)
6392 #define veor_s8(__a, __b) \
6393 (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1)
6395 #define veor_s16(__a, __b) \
6396 (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1)
6398 #define veor_s32(__a, __b) \
6399 (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1)
6401 #define veor_s64(__a, __b) \
6402 (int64x1_t)__builtin_neon_veorv1di (__a, __b, 1)
6404 #define veor_u8(__a, __b) \
6405 (uint8x8_t)__builtin_neon_veorv8qi (__a, __b, 0)
6407 #define veor_u16(__a, __b) \
6408 (uint16x4_t)__builtin_neon_veorv4hi (__a, __b, 0)
6410 #define veor_u32(__a, __b) \
6411 (uint32x2_t)__builtin_neon_veorv2si (__a, __b, 0)
6413 #define veor_u64(__a, __b) \
6414 (uint64x1_t)__builtin_neon_veorv1di (__a, __b, 0)
6416 #define veorq_s8(__a, __b) \
6417 (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1)
6419 #define veorq_s16(__a, __b) \
6420 (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1)
6422 #define veorq_s32(__a, __b) \
6423 (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1)
6425 #define veorq_s64(__a, __b) \
6426 (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1)
6428 #define veorq_u8(__a, __b) \
6429 (uint8x16_t)__builtin_neon_veorv16qi (__a, __b, 0)
6431 #define veorq_u16(__a, __b) \
6432 (uint16x8_t)__builtin_neon_veorv8hi (__a, __b, 0)
6434 #define veorq_u32(__a, __b) \
6435 (uint32x4_t)__builtin_neon_veorv4si (__a, __b, 0)
6437 #define veorq_u64(__a, __b) \
6438 (uint64x2_t)__builtin_neon_veorv2di (__a, __b, 0)
6440 #define vbic_s8(__a, __b) \
6441 (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1)
6443 #define vbic_s16(__a, __b) \
6444 (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1)
6446 #define vbic_s32(__a, __b) \
6447 (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1)
6449 #define vbic_s64(__a, __b) \
6450 (int64x1_t)__builtin_neon_vbicv1di (__a, __b, 1)
6452 #define vbic_u8(__a, __b) \
6453 (uint8x8_t)__builtin_neon_vbicv8qi (__a, __b, 0)
6455 #define vbic_u16(__a, __b) \
6456 (uint16x4_t)__builtin_neon_vbicv4hi (__a, __b, 0)
6458 #define vbic_u32(__a, __b) \
6459 (uint32x2_t)__builtin_neon_vbicv2si (__a, __b, 0)
6461 #define vbic_u64(__a, __b) \
6462 (uint64x1_t)__builtin_neon_vbicv1di (__a, __b, 0)
6464 #define vbicq_s8(__a, __b) \
6465 (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1)
6467 #define vbicq_s16(__a, __b) \
6468 (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1)
6470 #define vbicq_s32(__a, __b) \
6471 (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1)
6473 #define vbicq_s64(__a, __b) \
6474 (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1)
6476 #define vbicq_u8(__a, __b) \
6477 (uint8x16_t)__builtin_neon_vbicv16qi (__a, __b, 0)
6479 #define vbicq_u16(__a, __b) \
6480 (uint16x8_t)__builtin_neon_vbicv8hi (__a, __b, 0)
6482 #define vbicq_u32(__a, __b) \
6483 (uint32x4_t)__builtin_neon_vbicv4si (__a, __b, 0)
6485 #define vbicq_u64(__a, __b) \
6486 (uint64x2_t)__builtin_neon_vbicv2di (__a, __b, 0)
6488 #define vorn_s8(__a, __b) \
6489 (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1)
6491 #define vorn_s16(__a, __b) \
6492 (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1)
6494 #define vorn_s32(__a, __b) \
6495 (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1)
6497 #define vorn_s64(__a, __b) \
6498 (int64x1_t)__builtin_neon_vornv1di (__a, __b, 1)
6500 #define vorn_u8(__a, __b) \
6501 (uint8x8_t)__builtin_neon_vornv8qi (__a, __b, 0)
6503 #define vorn_u16(__a, __b) \
6504 (uint16x4_t)__builtin_neon_vornv4hi (__a, __b, 0)
6506 #define vorn_u32(__a, __b) \
6507 (uint32x2_t)__builtin_neon_vornv2si (__a, __b, 0)
6509 #define vorn_u64(__a, __b) \
6510 (uint64x1_t)__builtin_neon_vornv1di (__a, __b, 0)
6512 #define vornq_s8(__a, __b) \
6513 (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1)
6515 #define vornq_s16(__a, __b) \
6516 (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1)
6518 #define vornq_s32(__a, __b) \
6519 (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1)
6521 #define vornq_s64(__a, __b) \
6522 (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1)
6524 #define vornq_u8(__a, __b) \
6525 (uint8x16_t)__builtin_neon_vornv16qi (__a, __b, 0)
6527 #define vornq_u16(__a, __b) \
6528 (uint16x8_t)__builtin_neon_vornv8hi (__a, __b, 0)
6530 #define vornq_u32(__a, __b) \
6531 (uint32x4_t)__builtin_neon_vornv4si (__a, __b, 0)
6533 #define vornq_u64(__a, __b) \
6534 (uint64x2_t)__builtin_neon_vornv2di (__a, __b, 0)
6537 #define vreinterpret_p8_s8(__a) \
6538 (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6540 #define vreinterpret_p8_s16(__a) \
6541 (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6543 #define vreinterpret_p8_s32(__a) \
6544 (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6546 #define vreinterpret_p8_s64(__a) \
6547 (poly8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6549 #define vreinterpret_p8_f32(__a) \
6550 (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a)
6552 #define vreinterpret_p8_u8(__a) \
6553 (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6555 #define vreinterpret_p8_u16(__a) \
6556 (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6558 #define vreinterpret_p8_u32(__a) \
6559 (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6561 #define vreinterpret_p8_u64(__a) \
6562 (poly8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6564 #define vreinterpret_p8_p16(__a) \
6565 (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6567 #define vreinterpretq_p8_s8(__a) \
6568 (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6570 #define vreinterpretq_p8_s16(__a) \
6571 (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6573 #define vreinterpretq_p8_s32(__a) \
6574 (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6576 #define vreinterpretq_p8_s64(__a) \
6577 (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6579 #define vreinterpretq_p8_f32(__a) \
6580 (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a)
6582 #define vreinterpretq_p8_u8(__a) \
6583 (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6585 #define vreinterpretq_p8_u16(__a) \
6586 (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6588 #define vreinterpretq_p8_u32(__a) \
6589 (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6591 #define vreinterpretq_p8_u64(__a) \
6592 (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6594 #define vreinterpretq_p8_p16(__a) \
6595 (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6597 #define vreinterpret_p16_s8(__a) \
6598 (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6600 #define vreinterpret_p16_s16(__a) \
6601 (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6603 #define vreinterpret_p16_s32(__a) \
6604 (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6606 #define vreinterpret_p16_s64(__a) \
6607 (poly16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6609 #define vreinterpret_p16_f32(__a) \
6610 (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a)
6612 #define vreinterpret_p16_u8(__a) \
6613 (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6615 #define vreinterpret_p16_u16(__a) \
6616 (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6618 #define vreinterpret_p16_u32(__a) \
6619 (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6621 #define vreinterpret_p16_u64(__a) \
6622 (poly16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6624 #define vreinterpret_p16_p8(__a) \
6625 (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6627 #define vreinterpretq_p16_s8(__a) \
6628 (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6630 #define vreinterpretq_p16_s16(__a) \
6631 (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6633 #define vreinterpretq_p16_s32(__a) \
6634 (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6636 #define vreinterpretq_p16_s64(__a) \
6637 (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6639 #define vreinterpretq_p16_f32(__a) \
6640 (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a)
6642 #define vreinterpretq_p16_u8(__a) \
6643 (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6645 #define vreinterpretq_p16_u16(__a) \
6646 (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6648 #define vreinterpretq_p16_u32(__a) \
6649 (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6651 #define vreinterpretq_p16_u64(__a) \
6652 (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6654 #define vreinterpretq_p16_p8(__a) \
6655 (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6657 #define vreinterpret_f32_s8(__a) \
6658 (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a)
6660 #define vreinterpret_f32_s16(__a) \
6661 (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a)
6663 #define vreinterpret_f32_s32(__a) \
6664 (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a)
6666 #define vreinterpret_f32_s64(__a) \
6667 (float32x2_t)__builtin_neon_vreinterpretv2sfv1di (__a)
6669 #define vreinterpret_f32_u8(__a) \
6670 (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a)
6672 #define vreinterpret_f32_u16(__a) \
6673 (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a)
6675 #define vreinterpret_f32_u32(__a) \
6676 (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a)
6678 #define vreinterpret_f32_u64(__a) \
6679 (float32x2_t)__builtin_neon_vreinterpretv2sfv1di (__a)
6681 #define vreinterpret_f32_p8(__a) \
6682 (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a)
6684 #define vreinterpret_f32_p16(__a) \
6685 (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a)
6687 #define vreinterpretq_f32_s8(__a) \
6688 (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a)
6690 #define vreinterpretq_f32_s16(__a) \
6691 (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a)
6693 #define vreinterpretq_f32_s32(__a) \
6694 (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a)
6696 #define vreinterpretq_f32_s64(__a) \
6697 (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a)
6699 #define vreinterpretq_f32_u8(__a) \
6700 (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a)
6702 #define vreinterpretq_f32_u16(__a) \
6703 (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a)
6705 #define vreinterpretq_f32_u32(__a) \
6706 (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a)
6708 #define vreinterpretq_f32_u64(__a) \
6709 (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a)
6711 #define vreinterpretq_f32_p8(__a) \
6712 (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a)
6714 #define vreinterpretq_f32_p16(__a) \
6715 (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a)
6717 #define vreinterpret_s64_s8(__a) \
6718 (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6720 #define vreinterpret_s64_s16(__a) \
6721 (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6723 #define vreinterpret_s64_s32(__a) \
6724 (int64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6726 #define vreinterpret_s64_f32(__a) \
6727 (int64x1_t)__builtin_neon_vreinterpretv1div2sf (__a)
6729 #define vreinterpret_s64_u8(__a) \
6730 (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6732 #define vreinterpret_s64_u16(__a) \
6733 (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6735 #define vreinterpret_s64_u32(__a) \
6736 (int64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6738 #define vreinterpret_s64_u64(__a) \
6739 (int64x1_t)__builtin_neon_vreinterpretv1div1di (__a)
6741 #define vreinterpret_s64_p8(__a) \
6742 (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6744 #define vreinterpret_s64_p16(__a) \
6745 (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6747 #define vreinterpretq_s64_s8(__a) \
6748 (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6750 #define vreinterpretq_s64_s16(__a) \
6751 (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6753 #define vreinterpretq_s64_s32(__a) \
6754 (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6756 #define vreinterpretq_s64_f32(__a) \
6757 (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a)
6759 #define vreinterpretq_s64_u8(__a) \
6760 (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6762 #define vreinterpretq_s64_u16(__a) \
6763 (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6765 #define vreinterpretq_s64_u32(__a) \
6766 (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6768 #define vreinterpretq_s64_u64(__a) \
6769 (int64x2_t)__builtin_neon_vreinterpretv2div2di (__a)
6771 #define vreinterpretq_s64_p8(__a) \
6772 (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6774 #define vreinterpretq_s64_p16(__a) \
6775 (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6777 #define vreinterpret_u64_s8(__a) \
6778 (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6780 #define vreinterpret_u64_s16(__a) \
6781 (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6783 #define vreinterpret_u64_s32(__a) \
6784 (uint64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6786 #define vreinterpret_u64_s64(__a) \
6787 (uint64x1_t)__builtin_neon_vreinterpretv1div1di (__a)
6789 #define vreinterpret_u64_f32(__a) \
6790 (uint64x1_t)__builtin_neon_vreinterpretv1div2sf (__a)
6792 #define vreinterpret_u64_u8(__a) \
6793 (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6795 #define vreinterpret_u64_u16(__a) \
6796 (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6798 #define vreinterpret_u64_u32(__a) \
6799 (uint64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6801 #define vreinterpret_u64_p8(__a) \
6802 (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6804 #define vreinterpret_u64_p16(__a) \
6805 (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6807 #define vreinterpretq_u64_s8(__a) \
6808 (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6810 #define vreinterpretq_u64_s16(__a) \
6811 (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6813 #define vreinterpretq_u64_s32(__a) \
6814 (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6816 #define vreinterpretq_u64_s64(__a) \
6817 (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a)
6819 #define vreinterpretq_u64_f32(__a) \
6820 (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a)
6822 #define vreinterpretq_u64_u8(__a) \
6823 (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6825 #define vreinterpretq_u64_u16(__a) \
6826 (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6828 #define vreinterpretq_u64_u32(__a) \
6829 (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6831 #define vreinterpretq_u64_p8(__a) \
6832 (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6834 #define vreinterpretq_u64_p16(__a) \
6835 (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6837 #define vreinterpret_s8_s16(__a) \
6838 (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6840 #define vreinterpret_s8_s32(__a) \
6841 (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6843 #define vreinterpret_s8_s64(__a) \
6844 (int8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6846 #define vreinterpret_s8_f32(__a) \
6847 (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a)
6849 #define vreinterpret_s8_u8(__a) \
6850 (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6852 #define vreinterpret_s8_u16(__a) \
6853 (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6855 #define vreinterpret_s8_u32(__a) \
6856 (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6858 #define vreinterpret_s8_u64(__a) \
6859 (int8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6861 #define vreinterpret_s8_p8(__a) \
6862 (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6864 #define vreinterpret_s8_p16(__a) \
6865 (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6867 #define vreinterpretq_s8_s16(__a) \
6868 (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6870 #define vreinterpretq_s8_s32(__a) \
6871 (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6873 #define vreinterpretq_s8_s64(__a) \
6874 (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6876 #define vreinterpretq_s8_f32(__a) \
6877 (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a)
6879 #define vreinterpretq_s8_u8(__a) \
6880 (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6882 #define vreinterpretq_s8_u16(__a) \
6883 (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6885 #define vreinterpretq_s8_u32(__a) \
6886 (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6888 #define vreinterpretq_s8_u64(__a) \
6889 (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6891 #define vreinterpretq_s8_p8(__a) \
6892 (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6894 #define vreinterpretq_s8_p16(__a) \
6895 (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6897 #define vreinterpret_s16_s8(__a) \
6898 (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6900 #define vreinterpret_s16_s32(__a) \
6901 (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6903 #define vreinterpret_s16_s64(__a) \
6904 (int16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6906 #define vreinterpret_s16_f32(__a) \
6907 (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a)
6909 #define vreinterpret_s16_u8(__a) \
6910 (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6912 #define vreinterpret_s16_u16(__a) \
6913 (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6915 #define vreinterpret_s16_u32(__a) \
6916 (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6918 #define vreinterpret_s16_u64(__a) \
6919 (int16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6921 #define vreinterpret_s16_p8(__a) \
6922 (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6924 #define vreinterpret_s16_p16(__a) \
6925 (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6927 #define vreinterpretq_s16_s8(__a) \
6928 (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6930 #define vreinterpretq_s16_s32(__a) \
6931 (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6933 #define vreinterpretq_s16_s64(__a) \
6934 (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6936 #define vreinterpretq_s16_f32(__a) \
6937 (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a)
6939 #define vreinterpretq_s16_u8(__a) \
6940 (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6942 #define vreinterpretq_s16_u16(__a) \
6943 (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6945 #define vreinterpretq_s16_u32(__a) \
6946 (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6948 #define vreinterpretq_s16_u64(__a) \
6949 (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6951 #define vreinterpretq_s16_p8(__a) \
6952 (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6954 #define vreinterpretq_s16_p16(__a) \
6955 (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6957 #define vreinterpret_s32_s8(__a) \
6958 (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
6960 #define vreinterpret_s32_s16(__a) \
6961 (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
6963 #define vreinterpret_s32_s64(__a) \
6964 (int32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
6966 #define vreinterpret_s32_f32(__a) \
6967 (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a)
6969 #define vreinterpret_s32_u8(__a) \
6970 (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
6972 #define vreinterpret_s32_u16(__a) \
6973 (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
6975 #define vreinterpret_s32_u32(__a) \
6976 (int32x2_t)__builtin_neon_vreinterpretv2siv2si (__a)
6978 #define vreinterpret_s32_u64(__a) \
6979 (int32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
6981 #define vreinterpret_s32_p8(__a) \
6982 (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
6984 #define vreinterpret_s32_p16(__a) \
6985 (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
6987 #define vreinterpretq_s32_s8(__a) \
6988 (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
6990 #define vreinterpretq_s32_s16(__a) \
6991 (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
6993 #define vreinterpretq_s32_s64(__a) \
6994 (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
6996 #define vreinterpretq_s32_f32(__a) \
6997 (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a)
6999 #define vreinterpretq_s32_u8(__a) \
7000 (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7002 #define vreinterpretq_s32_u16(__a) \
7003 (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7005 #define vreinterpretq_s32_u32(__a) \
7006 (int32x4_t)__builtin_neon_vreinterpretv4siv4si (__a)
7008 #define vreinterpretq_s32_u64(__a) \
7009 (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
7011 #define vreinterpretq_s32_p8(__a) \
7012 (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7014 #define vreinterpretq_s32_p16(__a) \
7015 (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7017 #define vreinterpret_u8_s8(__a) \
7018 (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
7020 #define vreinterpret_u8_s16(__a) \
7021 (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
7023 #define vreinterpret_u8_s32(__a) \
7024 (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
7026 #define vreinterpret_u8_s64(__a) \
7027 (uint8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
7029 #define vreinterpret_u8_f32(__a) \
7030 (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a)
7032 #define vreinterpret_u8_u16(__a) \
7033 (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
7035 #define vreinterpret_u8_u32(__a) \
7036 (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
7038 #define vreinterpret_u8_u64(__a) \
7039 (uint8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
7041 #define vreinterpret_u8_p8(__a) \
7042 (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
7044 #define vreinterpret_u8_p16(__a) \
7045 (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
7047 #define vreinterpretq_u8_s8(__a) \
7048 (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
7050 #define vreinterpretq_u8_s16(__a) \
7051 (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
7053 #define vreinterpretq_u8_s32(__a) \
7054 (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
7056 #define vreinterpretq_u8_s64(__a) \
7057 (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
7059 #define vreinterpretq_u8_f32(__a) \
7060 (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a)
7062 #define vreinterpretq_u8_u16(__a) \
7063 (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
7065 #define vreinterpretq_u8_u32(__a) \
7066 (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
7068 #define vreinterpretq_u8_u64(__a) \
7069 (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
7071 #define vreinterpretq_u8_p8(__a) \
7072 (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
7074 #define vreinterpretq_u8_p16(__a) \
7075 (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
7077 #define vreinterpret_u16_s8(__a) \
7078 (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
7080 #define vreinterpret_u16_s16(__a) \
7081 (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
7083 #define vreinterpret_u16_s32(__a) \
7084 (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
7086 #define vreinterpret_u16_s64(__a) \
7087 (uint16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
7089 #define vreinterpret_u16_f32(__a) \
7090 (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a)
7092 #define vreinterpret_u16_u8(__a) \
7093 (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
7095 #define vreinterpret_u16_u32(__a) \
7096 (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
7098 #define vreinterpret_u16_u64(__a) \
7099 (uint16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
7101 #define vreinterpret_u16_p8(__a) \
7102 (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
7104 #define vreinterpret_u16_p16(__a) \
7105 (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
7107 #define vreinterpretq_u16_s8(__a) \
7108 (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
7110 #define vreinterpretq_u16_s16(__a) \
7111 (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
7113 #define vreinterpretq_u16_s32(__a) \
7114 (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
7116 #define vreinterpretq_u16_s64(__a) \
7117 (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
7119 #define vreinterpretq_u16_f32(__a) \
7120 (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a)
7122 #define vreinterpretq_u16_u8(__a) \
7123 (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
7125 #define vreinterpretq_u16_u32(__a) \
7126 (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
7128 #define vreinterpretq_u16_u64(__a) \
7129 (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
7131 #define vreinterpretq_u16_p8(__a) \
7132 (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
7134 #define vreinterpretq_u16_p16(__a) \
7135 (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
7137 #define vreinterpret_u32_s8(__a) \
7138 (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
7140 #define vreinterpret_u32_s16(__a) \
7141 (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
7143 #define vreinterpret_u32_s32(__a) \
7144 (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a)
7146 #define vreinterpret_u32_s64(__a) \
7147 (uint32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
7149 #define vreinterpret_u32_f32(__a) \
7150 (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a)
7152 #define vreinterpret_u32_u8(__a) \
7153 (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
7155 #define vreinterpret_u32_u16(__a) \
7156 (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
7158 #define vreinterpret_u32_u64(__a) \
7159 (uint32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
7161 #define vreinterpret_u32_p8(__a) \
7162 (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
7164 #define vreinterpret_u32_p16(__a) \
7165 (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
7167 #define vreinterpretq_u32_s8(__a) \
7168 (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7170 #define vreinterpretq_u32_s16(__a) \
7171 (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7173 #define vreinterpretq_u32_s32(__a) \
7174 (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a)
7176 #define vreinterpretq_u32_s64(__a) \
7177 (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
7179 #define vreinterpretq_u32_f32(__a) \
7180 (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a)
7182 #define vreinterpretq_u32_u8(__a) \
7183 (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7185 #define vreinterpretq_u32_u16(__a) \
7186 (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7188 #define vreinterpretq_u32_u64(__a) \
7189 (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
7191 #define vreinterpretq_u32_p8(__a) \
7192 (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7194 #define vreinterpretq_u32_p16(__a) \
7195 (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7197 #ifdef __cplusplus
7199 #endif
7200 #endif
7201 #endif