1 /* LLVM LOCAL file Changed to use preprocessor macros. */
2 /* APPLE LOCAL file v7 support. Merge from Codesourcery */
3 /* ARM NEON intrinsics include file. This file is generated automatically
4 using neon-gen.ml. Please do not edit manually.
6 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
7 Contributed by CodeSourcery.
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 2, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING. If not, write to the
23 Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
24 MA 02110-1301, USA. */
26 /* As a special exception, if you include this header file into source
27 files compiled by GCC, this header file does not by itself cause
28 the resulting executable to be covered by the GNU General Public
29 License. This exception does not however invalidate any other
30 reasons why the executable file might be covered by the GNU General
33 #ifndef _GCC_ARM_NEON_H
34 #define _GCC_ARM_NEON_H 1
37 #error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
46 typedef __builtin_neon_qi int8x8_t
__attribute__ ((__vector_size__ (8)));
47 typedef __builtin_neon_hi int16x4_t
__attribute__ ((__vector_size__ (8)));
48 typedef __builtin_neon_si int32x2_t
__attribute__ ((__vector_size__ (8)));
49 typedef __builtin_neon_di int64x1_t
__attribute__ ((__vector_size__ (8)));
50 typedef __builtin_neon_sf float32x2_t
__attribute__ ((__vector_size__ (8)));
51 typedef __builtin_neon_poly8 poly8x8_t
__attribute__ ((__vector_size__ (8)));
52 typedef __builtin_neon_poly16 poly16x4_t
__attribute__ ((__vector_size__ (8)));
53 typedef __builtin_neon_uqi uint8x8_t
__attribute__ ((__vector_size__ (8)));
54 typedef __builtin_neon_uhi uint16x4_t
__attribute__ ((__vector_size__ (8)));
55 typedef __builtin_neon_usi uint32x2_t
__attribute__ ((__vector_size__ (8)));
56 typedef __builtin_neon_udi uint64x1_t
__attribute__ ((__vector_size__ (8)));
57 typedef __builtin_neon_qi int8x16_t
__attribute__ ((__vector_size__ (16)));
58 typedef __builtin_neon_hi int16x8_t
__attribute__ ((__vector_size__ (16)));
59 typedef __builtin_neon_si int32x4_t
__attribute__ ((__vector_size__ (16)));
60 typedef __builtin_neon_di int64x2_t
__attribute__ ((__vector_size__ (16)));
61 typedef __builtin_neon_sf float32x4_t
__attribute__ ((__vector_size__ (16)));
62 typedef __builtin_neon_poly8 poly8x16_t
__attribute__ ((__vector_size__ (16)));
63 typedef __builtin_neon_poly16 poly16x8_t
__attribute__ ((__vector_size__ (16)));
64 typedef __builtin_neon_uqi uint8x16_t
__attribute__ ((__vector_size__ (16)));
65 typedef __builtin_neon_uhi uint16x8_t
__attribute__ ((__vector_size__ (16)));
66 typedef __builtin_neon_usi uint32x4_t
__attribute__ ((__vector_size__ (16)));
67 typedef __builtin_neon_udi uint64x2_t
__attribute__ ((__vector_size__ (16)));
69 typedef __builtin_neon_sf float32_t
;
70 typedef __builtin_neon_poly8 poly8_t
;
71 typedef __builtin_neon_poly16 poly16_t
;
73 typedef struct int8x8x2_t
78 typedef struct int8x16x2_t
83 typedef struct int16x4x2_t
88 typedef struct int16x8x2_t
93 typedef struct int32x2x2_t
98 typedef struct int32x4x2_t
103 typedef struct int64x1x2_t
108 typedef struct int64x2x2_t
113 typedef struct uint8x8x2_t
118 typedef struct uint8x16x2_t
123 typedef struct uint16x4x2_t
128 typedef struct uint16x8x2_t
133 typedef struct uint32x2x2_t
138 typedef struct uint32x4x2_t
143 typedef struct uint64x1x2_t
148 typedef struct uint64x2x2_t
153 typedef struct float32x2x2_t
158 typedef struct float32x4x2_t
163 typedef struct poly8x8x2_t
168 typedef struct poly8x16x2_t
173 typedef struct poly16x4x2_t
178 typedef struct poly16x8x2_t
183 typedef struct int8x8x3_t
188 typedef struct int8x16x3_t
193 typedef struct int16x4x3_t
198 typedef struct int16x8x3_t
203 typedef struct int32x2x3_t
208 typedef struct int32x4x3_t
213 typedef struct int64x1x3_t
218 typedef struct int64x2x3_t
223 typedef struct uint8x8x3_t
228 typedef struct uint8x16x3_t
233 typedef struct uint16x4x3_t
238 typedef struct uint16x8x3_t
243 typedef struct uint32x2x3_t
248 typedef struct uint32x4x3_t
253 typedef struct uint64x1x3_t
258 typedef struct uint64x2x3_t
263 typedef struct float32x2x3_t
268 typedef struct float32x4x3_t
273 typedef struct poly8x8x3_t
278 typedef struct poly8x16x3_t
283 typedef struct poly16x4x3_t
288 typedef struct poly16x8x3_t
293 typedef struct int8x8x4_t
298 typedef struct int8x16x4_t
303 typedef struct int16x4x4_t
308 typedef struct int16x8x4_t
313 typedef struct int32x2x4_t
318 typedef struct int32x4x4_t
323 typedef struct int64x1x4_t
328 typedef struct int64x2x4_t
333 typedef struct uint8x8x4_t
338 typedef struct uint8x16x4_t
343 typedef struct uint16x4x4_t
348 typedef struct uint16x8x4_t
353 typedef struct uint32x2x4_t
358 typedef struct uint32x4x4_t
363 typedef struct uint64x1x4_t
368 typedef struct uint64x2x4_t
373 typedef struct float32x2x4_t
378 typedef struct float32x4x4_t
383 typedef struct poly8x8x4_t
388 typedef struct poly8x16x4_t
393 typedef struct poly16x4x4_t
398 typedef struct poly16x8x4_t
404 #define vadd_s8(__a, __b) \
405 (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1)
407 #define vadd_s16(__a, __b) \
408 (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1)
410 #define vadd_s32(__a, __b) \
411 (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1)
413 #define vadd_s64(__a, __b) \
414 (int64x1_t)__builtin_neon_vaddv1di (__a, __b, 1)
416 #define vadd_f32(__a, __b) \
417 (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 5)
419 #define vadd_u8(__a, __b) \
420 (uint8x8_t)__builtin_neon_vaddv8qi (__a, __b, 0)
422 #define vadd_u16(__a, __b) \
423 (uint16x4_t)__builtin_neon_vaddv4hi (__a, __b, 0)
425 #define vadd_u32(__a, __b) \
426 (uint32x2_t)__builtin_neon_vaddv2si (__a, __b, 0)
428 #define vadd_u64(__a, __b) \
429 (uint64x1_t)__builtin_neon_vaddv1di (__a, __b, 0)
431 #define vaddq_s8(__a, __b) \
432 (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1)
434 #define vaddq_s16(__a, __b) \
435 (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1)
437 #define vaddq_s32(__a, __b) \
438 (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1)
440 #define vaddq_s64(__a, __b) \
441 (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1)
443 #define vaddq_f32(__a, __b) \
444 (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 5)
446 #define vaddq_u8(__a, __b) \
447 (uint8x16_t)__builtin_neon_vaddv16qi (__a, __b, 0)
449 #define vaddq_u16(__a, __b) \
450 (uint16x8_t)__builtin_neon_vaddv8hi (__a, __b, 0)
452 #define vaddq_u32(__a, __b) \
453 (uint32x4_t)__builtin_neon_vaddv4si (__a, __b, 0)
455 #define vaddq_u64(__a, __b) \
456 (uint64x2_t)__builtin_neon_vaddv2di (__a, __b, 0)
458 #define vaddl_s8(__a, __b) \
459 (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1)
461 #define vaddl_s16(__a, __b) \
462 (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1)
464 #define vaddl_s32(__a, __b) \
465 (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1)
467 #define vaddl_u8(__a, __b) \
468 (uint16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 0)
470 #define vaddl_u16(__a, __b) \
471 (uint32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 0)
473 #define vaddl_u32(__a, __b) \
474 (uint64x2_t)__builtin_neon_vaddlv2si (__a, __b, 0)
476 #define vaddw_s8(__a, __b) \
477 (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1)
479 #define vaddw_s16(__a, __b) \
480 (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1)
482 #define vaddw_s32(__a, __b) \
483 (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1)
485 #define vaddw_u8(__a, __b) \
486 (uint16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 0)
488 #define vaddw_u16(__a, __b) \
489 (uint32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 0)
491 #define vaddw_u32(__a, __b) \
492 (uint64x2_t)__builtin_neon_vaddwv2si (__a, __b, 0)
494 #define vhadd_s8(__a, __b) \
495 (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1)
497 #define vhadd_s16(__a, __b) \
498 (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1)
500 #define vhadd_s32(__a, __b) \
501 (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1)
503 #define vhadd_u8(__a, __b) \
504 (uint8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 0)
506 #define vhadd_u16(__a, __b) \
507 (uint16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 0)
509 #define vhadd_u32(__a, __b) \
510 (uint32x2_t)__builtin_neon_vhaddv2si (__a, __b, 0)
512 #define vhaddq_s8(__a, __b) \
513 (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1)
515 #define vhaddq_s16(__a, __b) \
516 (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1)
518 #define vhaddq_s32(__a, __b) \
519 (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1)
521 #define vhaddq_u8(__a, __b) \
522 (uint8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 0)
524 #define vhaddq_u16(__a, __b) \
525 (uint16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 0)
527 #define vhaddq_u32(__a, __b) \
528 (uint32x4_t)__builtin_neon_vhaddv4si (__a, __b, 0)
530 #define vrhadd_s8(__a, __b) \
531 (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 3)
533 #define vrhadd_s16(__a, __b) \
534 (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 3)
536 #define vrhadd_s32(__a, __b) \
537 (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 3)
539 #define vrhadd_u8(__a, __b) \
540 (uint8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 2)
542 #define vrhadd_u16(__a, __b) \
543 (uint16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 2)
545 #define vrhadd_u32(__a, __b) \
546 (uint32x2_t)__builtin_neon_vhaddv2si (__a, __b, 2)
548 #define vrhaddq_s8(__a, __b) \
549 (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 3)
551 #define vrhaddq_s16(__a, __b) \
552 (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 3)
554 #define vrhaddq_s32(__a, __b) \
555 (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 3)
557 #define vrhaddq_u8(__a, __b) \
558 (uint8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 2)
560 #define vrhaddq_u16(__a, __b) \
561 (uint16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 2)
563 #define vrhaddq_u32(__a, __b) \
564 (uint32x4_t)__builtin_neon_vhaddv4si (__a, __b, 2)
566 #define vqadd_s8(__a, __b) \
567 (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1)
569 #define vqadd_s16(__a, __b) \
570 (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1)
572 #define vqadd_s32(__a, __b) \
573 (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1)
575 #define vqadd_s64(__a, __b) \
576 (int64x1_t)__builtin_neon_vqaddv1di (__a, __b, 1)
578 #define vqadd_u8(__a, __b) \
579 (uint8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 0)
581 #define vqadd_u16(__a, __b) \
582 (uint16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 0)
584 #define vqadd_u32(__a, __b) \
585 (uint32x2_t)__builtin_neon_vqaddv2si (__a, __b, 0)
587 #define vqadd_u64(__a, __b) \
588 (uint64x1_t)__builtin_neon_vqaddv1di (__a, __b, 0)
590 #define vqaddq_s8(__a, __b) \
591 (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1)
593 #define vqaddq_s16(__a, __b) \
594 (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1)
596 #define vqaddq_s32(__a, __b) \
597 (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1)
599 #define vqaddq_s64(__a, __b) \
600 (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1)
602 #define vqaddq_u8(__a, __b) \
603 (uint8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 0)
605 #define vqaddq_u16(__a, __b) \
606 (uint16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 0)
608 #define vqaddq_u32(__a, __b) \
609 (uint32x4_t)__builtin_neon_vqaddv4si (__a, __b, 0)
611 #define vqaddq_u64(__a, __b) \
612 (uint64x2_t)__builtin_neon_vqaddv2di (__a, __b, 0)
614 #define vaddhn_s16(__a, __b) \
615 (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1)
617 #define vaddhn_s32(__a, __b) \
618 (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1)
620 #define vaddhn_s64(__a, __b) \
621 (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1)
623 #define vaddhn_u16(__a, __b) \
624 (uint8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 0)
626 #define vaddhn_u32(__a, __b) \
627 (uint16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 0)
629 #define vaddhn_u64(__a, __b) \
630 (uint32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 0)
632 #define vraddhn_s16(__a, __b) \
633 (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 3)
635 #define vraddhn_s32(__a, __b) \
636 (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 3)
638 #define vraddhn_s64(__a, __b) \
639 (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 3)
641 #define vraddhn_u16(__a, __b) \
642 (uint8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 2)
644 #define vraddhn_u32(__a, __b) \
645 (uint16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 2)
647 #define vraddhn_u64(__a, __b) \
648 (uint32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 2)
650 #define vmul_s8(__a, __b) \
651 (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1)
653 #define vmul_s16(__a, __b) \
654 (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1)
656 #define vmul_s32(__a, __b) \
657 (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1)
659 #define vmul_f32(__a, __b) \
660 (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 5)
662 #define vmul_u8(__a, __b) \
663 (uint8x8_t)__builtin_neon_vmulv8qi (__a, __b, 0)
665 #define vmul_u16(__a, __b) \
666 (uint16x4_t)__builtin_neon_vmulv4hi (__a, __b, 0)
668 #define vmul_u32(__a, __b) \
669 (uint32x2_t)__builtin_neon_vmulv2si (__a, __b, 0)
671 #define vmul_p8(__a, __b) \
672 (poly8x8_t)__builtin_neon_vmulv8qi (__a, __b, 4)
674 #define vmulq_s8(__a, __b) \
675 (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1)
677 #define vmulq_s16(__a, __b) \
678 (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1)
680 #define vmulq_s32(__a, __b) \
681 (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1)
683 #define vmulq_f32(__a, __b) \
684 (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 5)
686 #define vmulq_u8(__a, __b) \
687 (uint8x16_t)__builtin_neon_vmulv16qi (__a, __b, 0)
689 #define vmulq_u16(__a, __b) \
690 (uint16x8_t)__builtin_neon_vmulv8hi (__a, __b, 0)
692 #define vmulq_u32(__a, __b) \
693 (uint32x4_t)__builtin_neon_vmulv4si (__a, __b, 0)
695 #define vmulq_p8(__a, __b) \
696 (poly8x16_t)__builtin_neon_vmulv16qi (__a, __b, 4)
698 #define vqdmulh_s16(__a, __b) \
699 (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1)
701 #define vqdmulh_s32(__a, __b) \
702 (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1)
704 #define vqdmulhq_s16(__a, __b) \
705 (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1)
707 #define vqdmulhq_s32(__a, __b) \
708 (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1)
710 #define vqrdmulh_s16(__a, __b) \
711 (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 3)
713 #define vqrdmulh_s32(__a, __b) \
714 (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 3)
716 #define vqrdmulhq_s16(__a, __b) \
717 (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 3)
719 #define vqrdmulhq_s32(__a, __b) \
720 (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 3)
722 #define vmull_s8(__a, __b) \
723 (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1)
725 #define vmull_s16(__a, __b) \
726 (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1)
728 #define vmull_s32(__a, __b) \
729 (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1)
731 #define vmull_u8(__a, __b) \
732 (uint16x8_t)__builtin_neon_vmullv8qi (__a, __b, 0)
734 #define vmull_u16(__a, __b) \
735 (uint32x4_t)__builtin_neon_vmullv4hi (__a, __b, 0)
737 #define vmull_u32(__a, __b) \
738 (uint64x2_t)__builtin_neon_vmullv2si (__a, __b, 0)
740 #define vmull_p8(__a, __b) \
741 (poly16x8_t)__builtin_neon_vmullv8qi (__a, __b, 4)
743 #define vqdmull_s16(__a, __b) \
744 (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1)
746 #define vqdmull_s32(__a, __b) \
747 (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1)
749 #define vmla_s8(__a, __b, __c) \
750 (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1)
752 #define vmla_s16(__a, __b, __c) \
753 (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1)
755 #define vmla_s32(__a, __b, __c) \
756 (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1)
758 #define vmla_f32(__a, __b, __c) \
759 (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 5)
761 #define vmla_u8(__a, __b, __c) \
762 (uint8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 0)
764 #define vmla_u16(__a, __b, __c) \
765 (uint16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 0)
767 #define vmla_u32(__a, __b, __c) \
768 (uint32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 0)
770 #define vmlaq_s8(__a, __b, __c) \
771 (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1)
773 #define vmlaq_s16(__a, __b, __c) \
774 (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1)
776 #define vmlaq_s32(__a, __b, __c) \
777 (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1)
779 #define vmlaq_f32(__a, __b, __c) \
780 (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 5)
782 #define vmlaq_u8(__a, __b, __c) \
783 (uint8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 0)
785 #define vmlaq_u16(__a, __b, __c) \
786 (uint16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 0)
788 #define vmlaq_u32(__a, __b, __c) \
789 (uint32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 0)
791 #define vmlal_s8(__a, __b, __c) \
792 (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1)
794 #define vmlal_s16(__a, __b, __c) \
795 (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1)
797 #define vmlal_s32(__a, __b, __c) \
798 (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1)
800 #define vmlal_u8(__a, __b, __c) \
801 (uint16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 0)
803 #define vmlal_u16(__a, __b, __c) \
804 (uint32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 0)
806 #define vmlal_u32(__a, __b, __c) \
807 (uint64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 0)
809 #define vqdmlal_s16(__a, __b, __c) \
810 (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1)
812 #define vqdmlal_s32(__a, __b, __c) \
813 (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1)
815 #define vmls_s8(__a, __b, __c) \
816 (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1)
818 #define vmls_s16(__a, __b, __c) \
819 (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1)
821 #define vmls_s32(__a, __b, __c) \
822 (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1)
824 #define vmls_f32(__a, __b, __c) \
825 (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 5)
827 #define vmls_u8(__a, __b, __c) \
828 (uint8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 0)
830 #define vmls_u16(__a, __b, __c) \
831 (uint16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 0)
833 #define vmls_u32(__a, __b, __c) \
834 (uint32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 0)
836 #define vmlsq_s8(__a, __b, __c) \
837 (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1)
839 #define vmlsq_s16(__a, __b, __c) \
840 (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1)
842 #define vmlsq_s32(__a, __b, __c) \
843 (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1)
845 #define vmlsq_f32(__a, __b, __c) \
846 (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 5)
848 #define vmlsq_u8(__a, __b, __c) \
849 (uint8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 0)
851 #define vmlsq_u16(__a, __b, __c) \
852 (uint16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 0)
854 #define vmlsq_u32(__a, __b, __c) \
855 (uint32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 0)
857 #define vmlsl_s8(__a, __b, __c) \
858 (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1)
860 #define vmlsl_s16(__a, __b, __c) \
861 (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1)
863 #define vmlsl_s32(__a, __b, __c) \
864 (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1)
866 #define vmlsl_u8(__a, __b, __c) \
867 (uint16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 0)
869 #define vmlsl_u16(__a, __b, __c) \
870 (uint32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 0)
872 #define vmlsl_u32(__a, __b, __c) \
873 (uint64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 0)
875 #define vqdmlsl_s16(__a, __b, __c) \
876 (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1)
878 #define vqdmlsl_s32(__a, __b, __c) \
879 (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1)
881 #define vsub_s8(__a, __b) \
882 (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1)
884 #define vsub_s16(__a, __b) \
885 (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1)
887 #define vsub_s32(__a, __b) \
888 (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1)
890 #define vsub_s64(__a, __b) \
891 (int64x1_t)__builtin_neon_vsubv1di (__a, __b, 1)
893 #define vsub_f32(__a, __b) \
894 (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 5)
896 #define vsub_u8(__a, __b) \
897 (uint8x8_t)__builtin_neon_vsubv8qi (__a, __b, 0)
899 #define vsub_u16(__a, __b) \
900 (uint16x4_t)__builtin_neon_vsubv4hi (__a, __b, 0)
902 #define vsub_u32(__a, __b) \
903 (uint32x2_t)__builtin_neon_vsubv2si (__a, __b, 0)
905 #define vsub_u64(__a, __b) \
906 (uint64x1_t)__builtin_neon_vsubv1di (__a, __b, 0)
908 #define vsubq_s8(__a, __b) \
909 (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1)
911 #define vsubq_s16(__a, __b) \
912 (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1)
914 #define vsubq_s32(__a, __b) \
915 (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1)
917 #define vsubq_s64(__a, __b) \
918 (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1)
920 #define vsubq_f32(__a, __b) \
921 (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 5)
923 #define vsubq_u8(__a, __b) \
924 (uint8x16_t)__builtin_neon_vsubv16qi (__a, __b, 0)
926 #define vsubq_u16(__a, __b) \
927 (uint16x8_t)__builtin_neon_vsubv8hi (__a, __b, 0)
929 #define vsubq_u32(__a, __b) \
930 (uint32x4_t)__builtin_neon_vsubv4si (__a, __b, 0)
932 #define vsubq_u64(__a, __b) \
933 (uint64x2_t)__builtin_neon_vsubv2di (__a, __b, 0)
935 #define vsubl_s8(__a, __b) \
936 (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1)
938 #define vsubl_s16(__a, __b) \
939 (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1)
941 #define vsubl_s32(__a, __b) \
942 (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1)
944 #define vsubl_u8(__a, __b) \
945 (uint16x8_t)__builtin_neon_vsublv8qi (__a, __b, 0)
947 #define vsubl_u16(__a, __b) \
948 (uint32x4_t)__builtin_neon_vsublv4hi (__a, __b, 0)
950 #define vsubl_u32(__a, __b) \
951 (uint64x2_t)__builtin_neon_vsublv2si (__a, __b, 0)
953 #define vsubw_s8(__a, __b) \
954 (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1)
956 #define vsubw_s16(__a, __b) \
957 (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1)
959 #define vsubw_s32(__a, __b) \
960 (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1)
962 #define vsubw_u8(__a, __b) \
963 (uint16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 0)
965 #define vsubw_u16(__a, __b) \
966 (uint32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 0)
968 #define vsubw_u32(__a, __b) \
969 (uint64x2_t)__builtin_neon_vsubwv2si (__a, __b, 0)
971 #define vhsub_s8(__a, __b) \
972 (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1)
974 #define vhsub_s16(__a, __b) \
975 (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1)
977 #define vhsub_s32(__a, __b) \
978 (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1)
980 #define vhsub_u8(__a, __b) \
981 (uint8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 0)
983 #define vhsub_u16(__a, __b) \
984 (uint16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 0)
986 #define vhsub_u32(__a, __b) \
987 (uint32x2_t)__builtin_neon_vhsubv2si (__a, __b, 0)
989 #define vhsubq_s8(__a, __b) \
990 (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1)
992 #define vhsubq_s16(__a, __b) \
993 (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1)
995 #define vhsubq_s32(__a, __b) \
996 (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1)
998 #define vhsubq_u8(__a, __b) \
999 (uint8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 0)
1001 #define vhsubq_u16(__a, __b) \
1002 (uint16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 0)
1004 #define vhsubq_u32(__a, __b) \
1005 (uint32x4_t)__builtin_neon_vhsubv4si (__a, __b, 0)
1007 #define vqsub_s8(__a, __b) \
1008 (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1)
1010 #define vqsub_s16(__a, __b) \
1011 (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1)
1013 #define vqsub_s32(__a, __b) \
1014 (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1)
1016 #define vqsub_s64(__a, __b) \
1017 (int64x1_t)__builtin_neon_vqsubv1di (__a, __b, 1)
1019 #define vqsub_u8(__a, __b) \
1020 (uint8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 0)
1022 #define vqsub_u16(__a, __b) \
1023 (uint16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 0)
1025 #define vqsub_u32(__a, __b) \
1026 (uint32x2_t)__builtin_neon_vqsubv2si (__a, __b, 0)
1028 #define vqsub_u64(__a, __b) \
1029 (uint64x1_t)__builtin_neon_vqsubv1di (__a, __b, 0)
1031 #define vqsubq_s8(__a, __b) \
1032 (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1)
1034 #define vqsubq_s16(__a, __b) \
1035 (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1)
1037 #define vqsubq_s32(__a, __b) \
1038 (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1)
1040 #define vqsubq_s64(__a, __b) \
1041 (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1)
1043 #define vqsubq_u8(__a, __b) \
1044 (uint8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 0)
1046 #define vqsubq_u16(__a, __b) \
1047 (uint16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 0)
1049 #define vqsubq_u32(__a, __b) \
1050 (uint32x4_t)__builtin_neon_vqsubv4si (__a, __b, 0)
1052 #define vqsubq_u64(__a, __b) \
1053 (uint64x2_t)__builtin_neon_vqsubv2di (__a, __b, 0)
1055 #define vsubhn_s16(__a, __b) \
1056 (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1)
1058 #define vsubhn_s32(__a, __b) \
1059 (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1)
1061 #define vsubhn_s64(__a, __b) \
1062 (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1)
1064 #define vsubhn_u16(__a, __b) \
1065 (uint8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 0)
1067 #define vsubhn_u32(__a, __b) \
1068 (uint16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 0)
1070 #define vsubhn_u64(__a, __b) \
1071 (uint32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 0)
1073 #define vrsubhn_s16(__a, __b) \
1074 (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 3)
1076 #define vrsubhn_s32(__a, __b) \
1077 (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 3)
1079 #define vrsubhn_s64(__a, __b) \
1080 (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 3)
1082 #define vrsubhn_u16(__a, __b) \
1083 (uint8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 2)
1085 #define vrsubhn_u32(__a, __b) \
1086 (uint16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 2)
1088 #define vrsubhn_u64(__a, __b) \
1089 (uint32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 2)
1091 #define vceq_s8(__a, __b) \
1092 (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1)
1094 #define vceq_s16(__a, __b) \
1095 (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1)
1097 #define vceq_s32(__a, __b) \
1098 (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1)
1100 #define vceq_f32(__a, __b) \
1101 (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 5)
1103 #define vceq_u8(__a, __b) \
1104 (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 0)
1106 #define vceq_u16(__a, __b) \
1107 (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 0)
1109 #define vceq_u32(__a, __b) \
1110 (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 0)
1112 #define vceq_p8(__a, __b) \
1113 (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 4)
1115 #define vceqq_s8(__a, __b) \
1116 (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1)
1118 #define vceqq_s16(__a, __b) \
1119 (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1)
1121 #define vceqq_s32(__a, __b) \
1122 (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1)
1124 #define vceqq_f32(__a, __b) \
1125 (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 5)
1127 #define vceqq_u8(__a, __b) \
1128 (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 0)
1130 #define vceqq_u16(__a, __b) \
1131 (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 0)
1133 #define vceqq_u32(__a, __b) \
1134 (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 0)
1136 #define vceqq_p8(__a, __b) \
1137 (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 4)
1139 #define vcge_s8(__a, __b) \
1140 (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1)
1142 #define vcge_s16(__a, __b) \
1143 (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1)
1145 #define vcge_s32(__a, __b) \
1146 (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1)
1148 #define vcge_f32(__a, __b) \
1149 (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 5)
1151 #define vcge_u8(__a, __b) \
1152 (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 0)
1154 #define vcge_u16(__a, __b) \
1155 (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 0)
1157 #define vcge_u32(__a, __b) \
1158 (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 0)
1160 #define vcgeq_s8(__a, __b) \
1161 (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1)
1163 #define vcgeq_s16(__a, __b) \
1164 (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1)
1166 #define vcgeq_s32(__a, __b) \
1167 (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1)
1169 #define vcgeq_f32(__a, __b) \
1170 (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 5)
1172 #define vcgeq_u8(__a, __b) \
1173 (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 0)
1175 #define vcgeq_u16(__a, __b) \
1176 (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 0)
1178 #define vcgeq_u32(__a, __b) \
1179 (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 0)
1181 #define vcle_s8(__a, __b) \
1182 (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1)
1184 #define vcle_s16(__a, __b) \
1185 (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1)
1187 #define vcle_s32(__a, __b) \
1188 (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1)
1190 #define vcle_f32(__a, __b) \
1191 (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 5)
1193 #define vcle_u8(__a, __b) \
1194 (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 0)
1196 #define vcle_u16(__a, __b) \
1197 (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 0)
1199 #define vcle_u32(__a, __b) \
1200 (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 0)
1202 #define vcleq_s8(__a, __b) \
1203 (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1)
1205 #define vcleq_s16(__a, __b) \
1206 (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1)
1208 #define vcleq_s32(__a, __b) \
1209 (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1)
1211 #define vcleq_f32(__a, __b) \
1212 (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 5)
1214 #define vcleq_u8(__a, __b) \
1215 (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 0)
1217 #define vcleq_u16(__a, __b) \
1218 (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 0)
1220 #define vcleq_u32(__a, __b) \
1221 (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 0)
1223 #define vcgt_s8(__a, __b) \
1224 (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1)
1226 #define vcgt_s16(__a, __b) \
1227 (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1)
1229 #define vcgt_s32(__a, __b) \
1230 (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1)
1232 #define vcgt_f32(__a, __b) \
1233 (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 5)
1235 #define vcgt_u8(__a, __b) \
1236 (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 0)
1238 #define vcgt_u16(__a, __b) \
1239 (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 0)
1241 #define vcgt_u32(__a, __b) \
1242 (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 0)
1244 #define vcgtq_s8(__a, __b) \
1245 (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1)
1247 #define vcgtq_s16(__a, __b) \
1248 (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1)
1250 #define vcgtq_s32(__a, __b) \
1251 (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1)
1253 #define vcgtq_f32(__a, __b) \
1254 (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 5)
1256 #define vcgtq_u8(__a, __b) \
1257 (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 0)
1259 #define vcgtq_u16(__a, __b) \
1260 (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 0)
1262 #define vcgtq_u32(__a, __b) \
1263 (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 0)
1265 #define vclt_s8(__a, __b) \
1266 (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1)
1268 #define vclt_s16(__a, __b) \
1269 (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1)
1271 #define vclt_s32(__a, __b) \
1272 (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1)
1274 #define vclt_f32(__a, __b) \
1275 (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 5)
1277 #define vclt_u8(__a, __b) \
1278 (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 0)
1280 #define vclt_u16(__a, __b) \
1281 (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 0)
1283 #define vclt_u32(__a, __b) \
1284 (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 0)
1286 #define vcltq_s8(__a, __b) \
1287 (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1)
1289 #define vcltq_s16(__a, __b) \
1290 (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1)
1292 #define vcltq_s32(__a, __b) \
1293 (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1)
1295 #define vcltq_f32(__a, __b) \
1296 (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 5)
1298 #define vcltq_u8(__a, __b) \
1299 (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 0)
1301 #define vcltq_u16(__a, __b) \
1302 (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 0)
1304 #define vcltq_u32(__a, __b) \
1305 (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 0)
1307 #define vcage_f32(__a, __b) \
1308 (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 5)
1310 #define vcageq_f32(__a, __b) \
1311 (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 5)
1313 #define vcale_f32(__a, __b) \
1314 (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 5)
1316 #define vcaleq_f32(__a, __b) \
1317 (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 5)
1319 #define vcagt_f32(__a, __b) \
1320 (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 5)
1322 #define vcagtq_f32(__a, __b) \
1323 (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 5)
1325 #define vcalt_f32(__a, __b) \
1326 (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 5)
1328 #define vcaltq_f32(__a, __b) \
1329 (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 5)
1331 #define vtst_s8(__a, __b) \
1332 (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1)
1334 #define vtst_s16(__a, __b) \
1335 (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1)
1337 #define vtst_s32(__a, __b) \
1338 (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1)
1340 #define vtst_u8(__a, __b) \
1341 (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 0)
1343 #define vtst_u16(__a, __b) \
1344 (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 0)
1346 #define vtst_u32(__a, __b) \
1347 (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 0)
1349 #define vtst_p8(__a, __b) \
1350 (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 4)
1352 #define vtstq_s8(__a, __b) \
1353 (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1)
1355 #define vtstq_s16(__a, __b) \
1356 (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1)
1358 #define vtstq_s32(__a, __b) \
1359 (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1)
1361 #define vtstq_u8(__a, __b) \
1362 (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 0)
1364 #define vtstq_u16(__a, __b) \
1365 (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 0)
1367 #define vtstq_u32(__a, __b) \
1368 (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 0)
1370 #define vtstq_p8(__a, __b) \
1371 (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 4)
1373 #define vabd_s8(__a, __b) \
1374 (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1)
1376 #define vabd_s16(__a, __b) \
1377 (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1)
1379 #define vabd_s32(__a, __b) \
1380 (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1)
1382 #define vabd_f32(__a, __b) \
1383 (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 5)
1385 #define vabd_u8(__a, __b) \
1386 (uint8x8_t)__builtin_neon_vabdv8qi (__a, __b, 0)
1388 #define vabd_u16(__a, __b) \
1389 (uint16x4_t)__builtin_neon_vabdv4hi (__a, __b, 0)
1391 #define vabd_u32(__a, __b) \
1392 (uint32x2_t)__builtin_neon_vabdv2si (__a, __b, 0)
1394 #define vabdq_s8(__a, __b) \
1395 (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1)
1397 #define vabdq_s16(__a, __b) \
1398 (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1)
1400 #define vabdq_s32(__a, __b) \
1401 (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1)
1403 #define vabdq_f32(__a, __b) \
1404 (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 5)
1406 #define vabdq_u8(__a, __b) \
1407 (uint8x16_t)__builtin_neon_vabdv16qi (__a, __b, 0)
1409 #define vabdq_u16(__a, __b) \
1410 (uint16x8_t)__builtin_neon_vabdv8hi (__a, __b, 0)
1412 #define vabdq_u32(__a, __b) \
1413 (uint32x4_t)__builtin_neon_vabdv4si (__a, __b, 0)
1415 #define vabdl_s8(__a, __b) \
1416 (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1)
1418 #define vabdl_s16(__a, __b) \
1419 (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1)
1421 #define vabdl_s32(__a, __b) \
1422 (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1)
1424 #define vabdl_u8(__a, __b) \
1425 (uint16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 0)
1427 #define vabdl_u16(__a, __b) \
1428 (uint32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 0)
1430 #define vabdl_u32(__a, __b) \
1431 (uint64x2_t)__builtin_neon_vabdlv2si (__a, __b, 0)
1433 #define vaba_s8(__a, __b, __c) \
1434 (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1)
1436 #define vaba_s16(__a, __b, __c) \
1437 (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1)
1439 #define vaba_s32(__a, __b, __c) \
1440 (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1)
1442 #define vaba_u8(__a, __b, __c) \
1443 (uint8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 0)
1445 #define vaba_u16(__a, __b, __c) \
1446 (uint16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 0)
1448 #define vaba_u32(__a, __b, __c) \
1449 (uint32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 0)
1451 #define vabaq_s8(__a, __b, __c) \
1452 (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1)
1454 #define vabaq_s16(__a, __b, __c) \
1455 (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1)
1457 #define vabaq_s32(__a, __b, __c) \
1458 (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1)
1460 #define vabaq_u8(__a, __b, __c) \
1461 (uint8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 0)
1463 #define vabaq_u16(__a, __b, __c) \
1464 (uint16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 0)
1466 #define vabaq_u32(__a, __b, __c) \
1467 (uint32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 0)
1469 #define vabal_s8(__a, __b, __c) \
1470 (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1)
1472 #define vabal_s16(__a, __b, __c) \
1473 (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1)
1475 #define vabal_s32(__a, __b, __c) \
1476 (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1)
1478 #define vabal_u8(__a, __b, __c) \
1479 (uint16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 0)
1481 #define vabal_u16(__a, __b, __c) \
1482 (uint32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 0)
1484 #define vabal_u32(__a, __b, __c) \
1485 (uint64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 0)
1487 #define vmax_s8(__a, __b) \
1488 (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1)
1490 #define vmax_s16(__a, __b) \
1491 (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1)
1493 #define vmax_s32(__a, __b) \
1494 (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1)
1496 #define vmax_f32(__a, __b) \
1497 (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 5)
1499 #define vmax_u8(__a, __b) \
1500 (uint8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 0)
1502 #define vmax_u16(__a, __b) \
1503 (uint16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 0)
1505 #define vmax_u32(__a, __b) \
1506 (uint32x2_t)__builtin_neon_vmaxv2si (__a, __b, 0)
1508 #define vmaxq_s8(__a, __b) \
1509 (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1)
1511 #define vmaxq_s16(__a, __b) \
1512 (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1)
1514 #define vmaxq_s32(__a, __b) \
1515 (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1)
1517 #define vmaxq_f32(__a, __b) \
1518 (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 5)
1520 #define vmaxq_u8(__a, __b) \
1521 (uint8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 0)
1523 #define vmaxq_u16(__a, __b) \
1524 (uint16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 0)
1526 #define vmaxq_u32(__a, __b) \
1527 (uint32x4_t)__builtin_neon_vmaxv4si (__a, __b, 0)
1529 #define vmin_s8(__a, __b) \
1530 (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1)
1532 #define vmin_s16(__a, __b) \
1533 (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1)
1535 #define vmin_s32(__a, __b) \
1536 (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1)
1538 #define vmin_f32(__a, __b) \
1539 (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 5)
1541 #define vmin_u8(__a, __b) \
1542 (uint8x8_t)__builtin_neon_vminv8qi (__a, __b, 0)
1544 #define vmin_u16(__a, __b) \
1545 (uint16x4_t)__builtin_neon_vminv4hi (__a, __b, 0)
1547 #define vmin_u32(__a, __b) \
1548 (uint32x2_t)__builtin_neon_vminv2si (__a, __b, 0)
1550 #define vminq_s8(__a, __b) \
1551 (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1)
1553 #define vminq_s16(__a, __b) \
1554 (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1)
1556 #define vminq_s32(__a, __b) \
1557 (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1)
1559 #define vminq_f32(__a, __b) \
1560 (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 5)
1562 #define vminq_u8(__a, __b) \
1563 (uint8x16_t)__builtin_neon_vminv16qi (__a, __b, 0)
1565 #define vminq_u16(__a, __b) \
1566 (uint16x8_t)__builtin_neon_vminv8hi (__a, __b, 0)
1568 #define vminq_u32(__a, __b) \
1569 (uint32x4_t)__builtin_neon_vminv4si (__a, __b, 0)
1571 #define vpadd_s8(__a, __b) \
1572 (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1)
1574 #define vpadd_s16(__a, __b) \
1575 (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1)
1577 #define vpadd_s32(__a, __b) \
1578 (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1)
1580 #define vpadd_f32(__a, __b) \
1581 (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 5)
1583 #define vpadd_u8(__a, __b) \
1584 (uint8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 0)
1586 #define vpadd_u16(__a, __b) \
1587 (uint16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 0)
1589 #define vpadd_u32(__a, __b) \
1590 (uint32x2_t)__builtin_neon_vpaddv2si (__a, __b, 0)
1592 #define vpaddl_s8(__a) \
1593 (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1)
1595 #define vpaddl_s16(__a) \
1596 (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1)
1598 #define vpaddl_s32(__a) \
1599 (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1)
1601 #define vpaddl_u8(__a) \
1602 (uint16x4_t)__builtin_neon_vpaddlv8qi (__a, 0)
1604 #define vpaddl_u16(__a) \
1605 (uint32x2_t)__builtin_neon_vpaddlv4hi (__a, 0)
1607 #define vpaddl_u32(__a) \
1608 (uint64x1_t)__builtin_neon_vpaddlv2si (__a, 0)
1610 #define vpaddlq_s8(__a) \
1611 (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1)
1613 #define vpaddlq_s16(__a) \
1614 (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1)
1616 #define vpaddlq_s32(__a) \
1617 (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1)
1619 #define vpaddlq_u8(__a) \
1620 (uint16x8_t)__builtin_neon_vpaddlv16qi (__a, 0)
1622 #define vpaddlq_u16(__a) \
1623 (uint32x4_t)__builtin_neon_vpaddlv8hi (__a, 0)
1625 #define vpaddlq_u32(__a) \
1626 (uint64x2_t)__builtin_neon_vpaddlv4si (__a, 0)
1628 #define vpadal_s8(__a, __b) \
1629 (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1)
1631 #define vpadal_s16(__a, __b) \
1632 (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1)
1634 #define vpadal_s32(__a, __b) \
1635 (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1)
1637 #define vpadal_u8(__a, __b) \
1638 (uint16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 0)
1640 #define vpadal_u16(__a, __b) \
1641 (uint32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 0)
1643 #define vpadal_u32(__a, __b) \
1644 (uint64x1_t)__builtin_neon_vpadalv2si (__a, __b, 0)
1646 #define vpadalq_s8(__a, __b) \
1647 (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1)
1649 #define vpadalq_s16(__a, __b) \
1650 (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1)
1652 #define vpadalq_s32(__a, __b) \
1653 (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1)
1655 #define vpadalq_u8(__a, __b) \
1656 (uint16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 0)
1658 #define vpadalq_u16(__a, __b) \
1659 (uint32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 0)
1661 #define vpadalq_u32(__a, __b) \
1662 (uint64x2_t)__builtin_neon_vpadalv4si (__a, __b, 0)
1664 #define vpmax_s8(__a, __b) \
1665 (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1)
1667 #define vpmax_s16(__a, __b) \
1668 (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1)
1670 #define vpmax_s32(__a, __b) \
1671 (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1)
1673 #define vpmax_f32(__a, __b) \
1674 (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 5)
1676 #define vpmax_u8(__a, __b) \
1677 (uint8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 0)
1679 #define vpmax_u16(__a, __b) \
1680 (uint16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 0)
1682 #define vpmax_u32(__a, __b) \
1683 (uint32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 0)
1685 #define vpmin_s8(__a, __b) \
1686 (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1)
1688 #define vpmin_s16(__a, __b) \
1689 (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1)
1691 #define vpmin_s32(__a, __b) \
1692 (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1)
1694 #define vpmin_f32(__a, __b) \
1695 (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 5)
1697 #define vpmin_u8(__a, __b) \
1698 (uint8x8_t)__builtin_neon_vpminv8qi (__a, __b, 0)
1700 #define vpmin_u16(__a, __b) \
1701 (uint16x4_t)__builtin_neon_vpminv4hi (__a, __b, 0)
1703 #define vpmin_u32(__a, __b) \
1704 (uint32x2_t)__builtin_neon_vpminv2si (__a, __b, 0)
1706 #define vrecps_f32(__a, __b) \
1707 (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 5)
1709 #define vrecpsq_f32(__a, __b) \
1710 (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 5)
1712 #define vrsqrts_f32(__a, __b) \
1713 (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 5)
1715 #define vrsqrtsq_f32(__a, __b) \
1716 (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 5)
1718 #define vshl_s8(__a, __b) \
1719 (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1)
1721 #define vshl_s16(__a, __b) \
1722 (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1)
1724 #define vshl_s32(__a, __b) \
1725 (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1)
1727 #define vshl_s64(__a, __b) \
1728 (int64x1_t)__builtin_neon_vshlv1di (__a, __b, 1)
1730 #define vshl_u8(__a, __b) \
1731 (uint8x8_t)__builtin_neon_vshlv8qi (__a, __b, 0)
1733 #define vshl_u16(__a, __b) \
1734 (uint16x4_t)__builtin_neon_vshlv4hi (__a, __b, 0)
1736 #define vshl_u32(__a, __b) \
1737 (uint32x2_t)__builtin_neon_vshlv2si (__a, __b, 0)
1739 #define vshl_u64(__a, __b) \
1740 (uint64x1_t)__builtin_neon_vshlv1di (__a, __b, 0)
1742 #define vshlq_s8(__a, __b) \
1743 (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1)
1745 #define vshlq_s16(__a, __b) \
1746 (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1)
1748 #define vshlq_s32(__a, __b) \
1749 (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1)
1751 #define vshlq_s64(__a, __b) \
1752 (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1)
1754 #define vshlq_u8(__a, __b) \
1755 (uint8x16_t)__builtin_neon_vshlv16qi (__a, __b, 0)
1757 #define vshlq_u16(__a, __b) \
1758 (uint16x8_t)__builtin_neon_vshlv8hi (__a, __b, 0)
1760 #define vshlq_u32(__a, __b) \
1761 (uint32x4_t)__builtin_neon_vshlv4si (__a, __b, 0)
1763 #define vshlq_u64(__a, __b) \
1764 (uint64x2_t)__builtin_neon_vshlv2di (__a, __b, 0)
1766 #define vrshl_s8(__a, __b) \
1767 (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 3)
1769 #define vrshl_s16(__a, __b) \
1770 (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 3)
1772 #define vrshl_s32(__a, __b) \
1773 (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 3)
1775 #define vrshl_s64(__a, __b) \
1776 (int64x1_t)__builtin_neon_vshlv1di (__a, __b, 3)
1778 #define vrshl_u8(__a, __b) \
1779 (uint8x8_t)__builtin_neon_vshlv8qi (__a, __b, 2)
1781 #define vrshl_u16(__a, __b) \
1782 (uint16x4_t)__builtin_neon_vshlv4hi (__a, __b, 2)
1784 #define vrshl_u32(__a, __b) \
1785 (uint32x2_t)__builtin_neon_vshlv2si (__a, __b, 2)
1787 #define vrshl_u64(__a, __b) \
1788 (uint64x1_t)__builtin_neon_vshlv1di (__a, __b, 2)
1790 #define vrshlq_s8(__a, __b) \
1791 (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 3)
1793 #define vrshlq_s16(__a, __b) \
1794 (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 3)
1796 #define vrshlq_s32(__a, __b) \
1797 (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 3)
1799 #define vrshlq_s64(__a, __b) \
1800 (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 3)
1802 #define vrshlq_u8(__a, __b) \
1803 (uint8x16_t)__builtin_neon_vshlv16qi (__a, __b, 2)
1805 #define vrshlq_u16(__a, __b) \
1806 (uint16x8_t)__builtin_neon_vshlv8hi (__a, __b, 2)
1808 #define vrshlq_u32(__a, __b) \
1809 (uint32x4_t)__builtin_neon_vshlv4si (__a, __b, 2)
1811 #define vrshlq_u64(__a, __b) \
1812 (uint64x2_t)__builtin_neon_vshlv2di (__a, __b, 2)
1814 #define vqshl_s8(__a, __b) \
1815 (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1)
1817 #define vqshl_s16(__a, __b) \
1818 (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1)
1820 #define vqshl_s32(__a, __b) \
1821 (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1)
1823 #define vqshl_s64(__a, __b) \
1824 (int64x1_t)__builtin_neon_vqshlv1di (__a, __b, 1)
1826 #define vqshl_u8(__a, __b) \
1827 (uint8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 0)
1829 #define vqshl_u16(__a, __b) \
1830 (uint16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 0)
1832 #define vqshl_u32(__a, __b) \
1833 (uint32x2_t)__builtin_neon_vqshlv2si (__a, __b, 0)
1835 #define vqshl_u64(__a, __b) \
1836 (uint64x1_t)__builtin_neon_vqshlv1di (__a, __b, 0)
1838 #define vqshlq_s8(__a, __b) \
1839 (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1)
1841 #define vqshlq_s16(__a, __b) \
1842 (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1)
1844 #define vqshlq_s32(__a, __b) \
1845 (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1)
1847 #define vqshlq_s64(__a, __b) \
1848 (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1)
1850 #define vqshlq_u8(__a, __b) \
1851 (uint8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 0)
1853 #define vqshlq_u16(__a, __b) \
1854 (uint16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 0)
1856 #define vqshlq_u32(__a, __b) \
1857 (uint32x4_t)__builtin_neon_vqshlv4si (__a, __b, 0)
1859 #define vqshlq_u64(__a, __b) \
1860 (uint64x2_t)__builtin_neon_vqshlv2di (__a, __b, 0)
1862 #define vqrshl_s8(__a, __b) \
1863 (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 3)
1865 #define vqrshl_s16(__a, __b) \
1866 (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 3)
1868 #define vqrshl_s32(__a, __b) \
1869 (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 3)
1871 #define vqrshl_s64(__a, __b) \
1872 (int64x1_t)__builtin_neon_vqshlv1di (__a, __b, 3)
1874 #define vqrshl_u8(__a, __b) \
1875 (uint8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 2)
1877 #define vqrshl_u16(__a, __b) \
1878 (uint16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 2)
1880 #define vqrshl_u32(__a, __b) \
1881 (uint32x2_t)__builtin_neon_vqshlv2si (__a, __b, 2)
1883 #define vqrshl_u64(__a, __b) \
1884 (uint64x1_t)__builtin_neon_vqshlv1di (__a, __b, 2)
1886 #define vqrshlq_s8(__a, __b) \
1887 (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 3)
1889 #define vqrshlq_s16(__a, __b) \
1890 (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 3)
1892 #define vqrshlq_s32(__a, __b) \
1893 (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 3)
1895 #define vqrshlq_s64(__a, __b) \
1896 (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 3)
1898 #define vqrshlq_u8(__a, __b) \
1899 (uint8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 2)
1901 #define vqrshlq_u16(__a, __b) \
1902 (uint16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 2)
1904 #define vqrshlq_u32(__a, __b) \
1905 (uint32x4_t)__builtin_neon_vqshlv4si (__a, __b, 2)
1907 #define vqrshlq_u64(__a, __b) \
1908 (uint64x2_t)__builtin_neon_vqshlv2di (__a, __b, 2)
1910 #define vshr_n_s8(__a, __b) \
1911 (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1)
1913 #define vshr_n_s16(__a, __b) \
1914 (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1)
1916 #define vshr_n_s32(__a, __b) \
1917 (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1)
1919 #define vshr_n_s64(__a, __b) \
1920 (int64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 1)
1922 #define vshr_n_u8(__a, __b) \
1923 (uint8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 0)
1925 #define vshr_n_u16(__a, __b) \
1926 (uint16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 0)
1928 #define vshr_n_u32(__a, __b) \
1929 (uint32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 0)
1931 #define vshr_n_u64(__a, __b) \
1932 (uint64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 0)
1934 #define vshrq_n_s8(__a, __b) \
1935 (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1)
1937 #define vshrq_n_s16(__a, __b) \
1938 (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1)
1940 #define vshrq_n_s32(__a, __b) \
1941 (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1)
1943 #define vshrq_n_s64(__a, __b) \
1944 (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1)
1946 #define vshrq_n_u8(__a, __b) \
1947 (uint8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 0)
1949 #define vshrq_n_u16(__a, __b) \
1950 (uint16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 0)
1952 #define vshrq_n_u32(__a, __b) \
1953 (uint32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 0)
1955 #define vshrq_n_u64(__a, __b) \
1956 (uint64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 0)
1958 #define vrshr_n_s8(__a, __b) \
1959 (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 3)
1961 #define vrshr_n_s16(__a, __b) \
1962 (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 3)
1964 #define vrshr_n_s32(__a, __b) \
1965 (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 3)
1967 #define vrshr_n_s64(__a, __b) \
1968 (int64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 3)
1970 #define vrshr_n_u8(__a, __b) \
1971 (uint8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 2)
1973 #define vrshr_n_u16(__a, __b) \
1974 (uint16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 2)
1976 #define vrshr_n_u32(__a, __b) \
1977 (uint32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 2)
1979 #define vrshr_n_u64(__a, __b) \
1980 (uint64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 2)
1982 #define vrshrq_n_s8(__a, __b) \
1983 (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 3)
1985 #define vrshrq_n_s16(__a, __b) \
1986 (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 3)
1988 #define vrshrq_n_s32(__a, __b) \
1989 (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 3)
1991 #define vrshrq_n_s64(__a, __b) \
1992 (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 3)
1994 #define vrshrq_n_u8(__a, __b) \
1995 (uint8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 2)
1997 #define vrshrq_n_u16(__a, __b) \
1998 (uint16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 2)
2000 #define vrshrq_n_u32(__a, __b) \
2001 (uint32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 2)
2003 #define vrshrq_n_u64(__a, __b) \
2004 (uint64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 2)
2006 #define vshrn_n_s16(__a, __b) \
2007 (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1)
2009 #define vshrn_n_s32(__a, __b) \
2010 (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1)
2012 #define vshrn_n_s64(__a, __b) \
2013 (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1)
2015 #define vshrn_n_u16(__a, __b) \
2016 (uint8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 0)
2018 #define vshrn_n_u32(__a, __b) \
2019 (uint16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 0)
2021 #define vshrn_n_u64(__a, __b) \
2022 (uint32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 0)
2024 #define vrshrn_n_s16(__a, __b) \
2025 (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 3)
2027 #define vrshrn_n_s32(__a, __b) \
2028 (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 3)
2030 #define vrshrn_n_s64(__a, __b) \
2031 (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 3)
2033 #define vrshrn_n_u16(__a, __b) \
2034 (uint8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 2)
2036 #define vrshrn_n_u32(__a, __b) \
2037 (uint16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 2)
2039 #define vrshrn_n_u64(__a, __b) \
2040 (uint32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 2)
2042 #define vqshrn_n_s16(__a, __b) \
2043 (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1)
2045 #define vqshrn_n_s32(__a, __b) \
2046 (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1)
2048 #define vqshrn_n_s64(__a, __b) \
2049 (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1)
2051 #define vqshrn_n_u16(__a, __b) \
2052 (uint8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 0)
2054 #define vqshrn_n_u32(__a, __b) \
2055 (uint16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 0)
2057 #define vqshrn_n_u64(__a, __b) \
2058 (uint32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 0)
2060 #define vqrshrn_n_s16(__a, __b) \
2061 (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 3)
2063 #define vqrshrn_n_s32(__a, __b) \
2064 (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 3)
2066 #define vqrshrn_n_s64(__a, __b) \
2067 (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 3)
2069 #define vqrshrn_n_u16(__a, __b) \
2070 (uint8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 2)
2072 #define vqrshrn_n_u32(__a, __b) \
2073 (uint16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 2)
2075 #define vqrshrn_n_u64(__a, __b) \
2076 (uint32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 2)
2078 #define vqshrun_n_s16(__a, __b) \
2079 (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1)
2081 #define vqshrun_n_s32(__a, __b) \
2082 (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1)
2084 #define vqshrun_n_s64(__a, __b) \
2085 (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1)
2087 #define vqrshrun_n_s16(__a, __b) \
2088 (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 3)
2090 #define vqrshrun_n_s32(__a, __b) \
2091 (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 3)
2093 #define vqrshrun_n_s64(__a, __b) \
2094 (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 3)
2096 #define vshl_n_s8(__a, __b) \
2097 (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1)
2099 #define vshl_n_s16(__a, __b) \
2100 (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1)
2102 #define vshl_n_s32(__a, __b) \
2103 (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1)
2105 #define vshl_n_s64(__a, __b) \
2106 (int64x1_t)__builtin_neon_vshl_nv1di (__a, __b, 1)
2108 #define vshl_n_u8(__a, __b) \
2109 (uint8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 0)
2111 #define vshl_n_u16(__a, __b) \
2112 (uint16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 0)
2114 #define vshl_n_u32(__a, __b) \
2115 (uint32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 0)
2117 #define vshl_n_u64(__a, __b) \
2118 (uint64x1_t)__builtin_neon_vshl_nv1di (__a, __b, 0)
2120 #define vshlq_n_s8(__a, __b) \
2121 (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1)
2123 #define vshlq_n_s16(__a, __b) \
2124 (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1)
2126 #define vshlq_n_s32(__a, __b) \
2127 (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1)
2129 #define vshlq_n_s64(__a, __b) \
2130 (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1)
2132 #define vshlq_n_u8(__a, __b) \
2133 (uint8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 0)
2135 #define vshlq_n_u16(__a, __b) \
2136 (uint16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 0)
2138 #define vshlq_n_u32(__a, __b) \
2139 (uint32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 0)
2141 #define vshlq_n_u64(__a, __b) \
2142 (uint64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 0)
2144 #define vqshl_n_s8(__a, __b) \
2145 (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1)
2147 #define vqshl_n_s16(__a, __b) \
2148 (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1)
2150 #define vqshl_n_s32(__a, __b) \
2151 (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1)
2153 #define vqshl_n_s64(__a, __b) \
2154 (int64x1_t)__builtin_neon_vqshl_nv1di (__a, __b, 1)
2156 #define vqshl_n_u8(__a, __b) \
2157 (uint8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 0)
2159 #define vqshl_n_u16(__a, __b) \
2160 (uint16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 0)
2162 #define vqshl_n_u32(__a, __b) \
2163 (uint32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 0)
2165 #define vqshl_n_u64(__a, __b) \
2166 (uint64x1_t)__builtin_neon_vqshl_nv1di (__a, __b, 0)
2168 #define vqshlq_n_s8(__a, __b) \
2169 (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1)
2171 #define vqshlq_n_s16(__a, __b) \
2172 (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1)
2174 #define vqshlq_n_s32(__a, __b) \
2175 (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1)
2177 #define vqshlq_n_s64(__a, __b) \
2178 (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1)
2180 #define vqshlq_n_u8(__a, __b) \
2181 (uint8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 0)
2183 #define vqshlq_n_u16(__a, __b) \
2184 (uint16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 0)
2186 #define vqshlq_n_u32(__a, __b) \
2187 (uint32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 0)
2189 #define vqshlq_n_u64(__a, __b) \
2190 (uint64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 0)
2192 #define vqshlu_n_s8(__a, __b) \
2193 (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1)
2195 #define vqshlu_n_s16(__a, __b) \
2196 (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1)
2198 #define vqshlu_n_s32(__a, __b) \
2199 (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1)
2201 #define vqshlu_n_s64(__a, __b) \
2202 (uint64x1_t)__builtin_neon_vqshlu_nv1di (__a, __b, 1)
2204 #define vqshluq_n_s8(__a, __b) \
2205 (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1)
2207 #define vqshluq_n_s16(__a, __b) \
2208 (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1)
2210 #define vqshluq_n_s32(__a, __b) \
2211 (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1)
2213 #define vqshluq_n_s64(__a, __b) \
2214 (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1)
2216 #define vshll_n_s8(__a, __b) \
2217 (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1)
2219 #define vshll_n_s16(__a, __b) \
2220 (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1)
2222 #define vshll_n_s32(__a, __b) \
2223 (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1)
2225 #define vshll_n_u8(__a, __b) \
2226 (uint16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 0)
2228 #define vshll_n_u16(__a, __b) \
2229 (uint32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 0)
2231 #define vshll_n_u32(__a, __b) \
2232 (uint64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 0)
2234 #define vsra_n_s8(__a, __b, __c) \
2235 (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1)
2237 #define vsra_n_s16(__a, __b, __c) \
2238 (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1)
2240 #define vsra_n_s32(__a, __b, __c) \
2241 (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1)
2243 #define vsra_n_s64(__a, __b, __c) \
2244 (int64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 1)
2246 #define vsra_n_u8(__a, __b, __c) \
2247 (uint8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 0)
2249 #define vsra_n_u16(__a, __b, __c) \
2250 (uint16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 0)
2252 #define vsra_n_u32(__a, __b, __c) \
2253 (uint32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 0)
2255 #define vsra_n_u64(__a, __b, __c) \
2256 (uint64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 0)
2258 #define vsraq_n_s8(__a, __b, __c) \
2259 (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1)
2261 #define vsraq_n_s16(__a, __b, __c) \
2262 (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1)
2264 #define vsraq_n_s32(__a, __b, __c) \
2265 (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1)
2267 #define vsraq_n_s64(__a, __b, __c) \
2268 (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1)
2270 #define vsraq_n_u8(__a, __b, __c) \
2271 (uint8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 0)
2273 #define vsraq_n_u16(__a, __b, __c) \
2274 (uint16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 0)
2276 #define vsraq_n_u32(__a, __b, __c) \
2277 (uint32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 0)
2279 #define vsraq_n_u64(__a, __b, __c) \
2280 (uint64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 0)
2282 #define vrsra_n_s8(__a, __b, __c) \
2283 (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 3)
2285 #define vrsra_n_s16(__a, __b, __c) \
2286 (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 3)
2288 #define vrsra_n_s32(__a, __b, __c) \
2289 (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 3)
2291 #define vrsra_n_s64(__a, __b, __c) \
2292 (int64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 3)
2294 #define vrsra_n_u8(__a, __b, __c) \
2295 (uint8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 2)
2297 #define vrsra_n_u16(__a, __b, __c) \
2298 (uint16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 2)
2300 #define vrsra_n_u32(__a, __b, __c) \
2301 (uint32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 2)
2303 #define vrsra_n_u64(__a, __b, __c) \
2304 (uint64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 2)
2306 #define vrsraq_n_s8(__a, __b, __c) \
2307 (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 3)
2309 #define vrsraq_n_s16(__a, __b, __c) \
2310 (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 3)
2312 #define vrsraq_n_s32(__a, __b, __c) \
2313 (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 3)
2315 #define vrsraq_n_s64(__a, __b, __c) \
2316 (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 3)
2318 #define vrsraq_n_u8(__a, __b, __c) \
2319 (uint8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 2)
2321 #define vrsraq_n_u16(__a, __b, __c) \
2322 (uint16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 2)
2324 #define vrsraq_n_u32(__a, __b, __c) \
2325 (uint32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 2)
2327 #define vrsraq_n_u64(__a, __b, __c) \
2328 (uint64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 2)
2330 #define vsri_n_s8(__a, __b, __c) \
2331 (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c)
2333 #define vsri_n_s16(__a, __b, __c) \
2334 (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c)
2336 #define vsri_n_s32(__a, __b, __c) \
2337 (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c)
2339 #define vsri_n_s64(__a, __b, __c) \
2340 (int64x1_t)__builtin_neon_vsri_nv1di (__a, __b, __c)
2342 #define vsri_n_u8(__a, __b, __c) \
2343 (uint8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c)
2345 #define vsri_n_u16(__a, __b, __c) \
2346 (uint16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c)
2348 #define vsri_n_u32(__a, __b, __c) \
2349 (uint32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c)
2351 #define vsri_n_u64(__a, __b, __c) \
2352 (uint64x1_t)__builtin_neon_vsri_nv1di (__a, __b, __c)
2354 #define vsri_n_p8(__a, __b, __c) \
2355 (poly8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c)
2357 #define vsri_n_p16(__a, __b, __c) \
2358 (poly16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c)
2360 #define vsriq_n_s8(__a, __b, __c) \
2361 (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c)
2363 #define vsriq_n_s16(__a, __b, __c) \
2364 (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c)
2366 #define vsriq_n_s32(__a, __b, __c) \
2367 (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c)
2369 #define vsriq_n_s64(__a, __b, __c) \
2370 (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c)
2372 #define vsriq_n_u8(__a, __b, __c) \
2373 (uint8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c)
2375 #define vsriq_n_u16(__a, __b, __c) \
2376 (uint16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c)
2378 #define vsriq_n_u32(__a, __b, __c) \
2379 (uint32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c)
2381 #define vsriq_n_u64(__a, __b, __c) \
2382 (uint64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c)
2384 #define vsriq_n_p8(__a, __b, __c) \
2385 (poly8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c)
2387 #define vsriq_n_p16(__a, __b, __c) \
2388 (poly16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c)
2390 #define vsli_n_s8(__a, __b, __c) \
2391 (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c)
2393 #define vsli_n_s16(__a, __b, __c) \
2394 (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c)
2396 #define vsli_n_s32(__a, __b, __c) \
2397 (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c)
2399 #define vsli_n_s64(__a, __b, __c) \
2400 (int64x1_t)__builtin_neon_vsli_nv1di (__a, __b, __c)
2402 #define vsli_n_u8(__a, __b, __c) \
2403 (uint8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c)
2405 #define vsli_n_u16(__a, __b, __c) \
2406 (uint16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c)
2408 #define vsli_n_u32(__a, __b, __c) \
2409 (uint32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c)
2411 #define vsli_n_u64(__a, __b, __c) \
2412 (uint64x1_t)__builtin_neon_vsli_nv1di (__a, __b, __c)
2414 #define vsli_n_p8(__a, __b, __c) \
2415 (poly8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c)
2417 #define vsli_n_p16(__a, __b, __c) \
2418 (poly16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c)
2420 #define vsliq_n_s8(__a, __b, __c) \
2421 (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c)
2423 #define vsliq_n_s16(__a, __b, __c) \
2424 (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c)
2426 #define vsliq_n_s32(__a, __b, __c) \
2427 (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c)
2429 #define vsliq_n_s64(__a, __b, __c) \
2430 (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c)
2432 #define vsliq_n_u8(__a, __b, __c) \
2433 (uint8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c)
2435 #define vsliq_n_u16(__a, __b, __c) \
2436 (uint16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c)
2438 #define vsliq_n_u32(__a, __b, __c) \
2439 (uint32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c)
2441 #define vsliq_n_u64(__a, __b, __c) \
2442 (uint64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c)
2444 #define vsliq_n_p8(__a, __b, __c) \
2445 (poly8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c)
2447 #define vsliq_n_p16(__a, __b, __c) \
2448 (poly16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c)
2450 #define vabs_s8(__a) \
2451 (int8x8_t)__builtin_neon_vabsv8qi (__a, 1)
2453 #define vabs_s16(__a) \
2454 (int16x4_t)__builtin_neon_vabsv4hi (__a, 1)
2456 #define vabs_s32(__a) \
2457 (int32x2_t)__builtin_neon_vabsv2si (__a, 1)
2459 #define vabs_f32(__a) \
2460 (float32x2_t)__builtin_neon_vabsv2sf (__a, 5)
2462 #define vabsq_s8(__a) \
2463 (int8x16_t)__builtin_neon_vabsv16qi (__a, 1)
2465 #define vabsq_s16(__a) \
2466 (int16x8_t)__builtin_neon_vabsv8hi (__a, 1)
2468 #define vabsq_s32(__a) \
2469 (int32x4_t)__builtin_neon_vabsv4si (__a, 1)
2471 #define vabsq_f32(__a) \
2472 (float32x4_t)__builtin_neon_vabsv4sf (__a, 5)
2474 #define vqabs_s8(__a) \
2475 (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1)
2477 #define vqabs_s16(__a) \
2478 (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1)
2480 #define vqabs_s32(__a) \
2481 (int32x2_t)__builtin_neon_vqabsv2si (__a, 1)
2483 #define vqabsq_s8(__a) \
2484 (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1)
2486 #define vqabsq_s16(__a) \
2487 (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1)
2489 #define vqabsq_s32(__a) \
2490 (int32x4_t)__builtin_neon_vqabsv4si (__a, 1)
2492 #define vneg_s8(__a) \
2493 (int8x8_t)__builtin_neon_vnegv8qi (__a, 1)
2495 #define vneg_s16(__a) \
2496 (int16x4_t)__builtin_neon_vnegv4hi (__a, 1)
2498 #define vneg_s32(__a) \
2499 (int32x2_t)__builtin_neon_vnegv2si (__a, 1)
2501 #define vneg_f32(__a) \
2502 (float32x2_t)__builtin_neon_vnegv2sf (__a, 5)
2504 #define vnegq_s8(__a) \
2505 (int8x16_t)__builtin_neon_vnegv16qi (__a, 1)
2507 #define vnegq_s16(__a) \
2508 (int16x8_t)__builtin_neon_vnegv8hi (__a, 1)
2510 #define vnegq_s32(__a) \
2511 (int32x4_t)__builtin_neon_vnegv4si (__a, 1)
2513 #define vnegq_f32(__a) \
2514 (float32x4_t)__builtin_neon_vnegv4sf (__a, 5)
2516 #define vqneg_s8(__a) \
2517 (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1)
2519 #define vqneg_s16(__a) \
2520 (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1)
2522 #define vqneg_s32(__a) \
2523 (int32x2_t)__builtin_neon_vqnegv2si (__a, 1)
2525 #define vqnegq_s8(__a) \
2526 (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1)
2528 #define vqnegq_s16(__a) \
2529 (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1)
2531 #define vqnegq_s32(__a) \
2532 (int32x4_t)__builtin_neon_vqnegv4si (__a, 1)
2534 #define vmvn_s8(__a) \
2535 (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1)
2537 #define vmvn_s16(__a) \
2538 (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1)
2540 #define vmvn_s32(__a) \
2541 (int32x2_t)__builtin_neon_vmvnv2si (__a, 1)
2543 #define vmvn_u8(__a) \
2544 (uint8x8_t)__builtin_neon_vmvnv8qi (__a, 0)
2546 #define vmvn_u16(__a) \
2547 (uint16x4_t)__builtin_neon_vmvnv4hi (__a, 0)
2549 #define vmvn_u32(__a) \
2550 (uint32x2_t)__builtin_neon_vmvnv2si (__a, 0)
2552 #define vmvn_p8(__a) \
2553 (poly8x8_t)__builtin_neon_vmvnv8qi (__a, 4)
2555 #define vmvnq_s8(__a) \
2556 (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1)
2558 #define vmvnq_s16(__a) \
2559 (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1)
2561 #define vmvnq_s32(__a) \
2562 (int32x4_t)__builtin_neon_vmvnv4si (__a, 1)
2564 #define vmvnq_u8(__a) \
2565 (uint8x16_t)__builtin_neon_vmvnv16qi (__a, 0)
2567 #define vmvnq_u16(__a) \
2568 (uint16x8_t)__builtin_neon_vmvnv8hi (__a, 0)
2570 #define vmvnq_u32(__a) \
2571 (uint32x4_t)__builtin_neon_vmvnv4si (__a, 0)
2573 #define vmvnq_p8(__a) \
2574 (poly8x16_t)__builtin_neon_vmvnv16qi (__a, 4)
2576 #define vcls_s8(__a) \
2577 (int8x8_t)__builtin_neon_vclsv8qi (__a, 1)
2579 #define vcls_s16(__a) \
2580 (int16x4_t)__builtin_neon_vclsv4hi (__a, 1)
2582 #define vcls_s32(__a) \
2583 (int32x2_t)__builtin_neon_vclsv2si (__a, 1)
2585 #define vclsq_s8(__a) \
2586 (int8x16_t)__builtin_neon_vclsv16qi (__a, 1)
2588 #define vclsq_s16(__a) \
2589 (int16x8_t)__builtin_neon_vclsv8hi (__a, 1)
2591 #define vclsq_s32(__a) \
2592 (int32x4_t)__builtin_neon_vclsv4si (__a, 1)
2594 #define vclz_s8(__a) \
2595 (int8x8_t)__builtin_neon_vclzv8qi (__a, 1)
2597 #define vclz_s16(__a) \
2598 (int16x4_t)__builtin_neon_vclzv4hi (__a, 1)
2600 #define vclz_s32(__a) \
2601 (int32x2_t)__builtin_neon_vclzv2si (__a, 1)
2603 #define vclz_u8(__a) \
2604 (uint8x8_t)__builtin_neon_vclzv8qi (__a, 0)
2606 #define vclz_u16(__a) \
2607 (uint16x4_t)__builtin_neon_vclzv4hi (__a, 0)
2609 #define vclz_u32(__a) \
2610 (uint32x2_t)__builtin_neon_vclzv2si (__a, 0)
2612 #define vclzq_s8(__a) \
2613 (int8x16_t)__builtin_neon_vclzv16qi (__a, 1)
2615 #define vclzq_s16(__a) \
2616 (int16x8_t)__builtin_neon_vclzv8hi (__a, 1)
2618 #define vclzq_s32(__a) \
2619 (int32x4_t)__builtin_neon_vclzv4si (__a, 1)
2621 #define vclzq_u8(__a) \
2622 (uint8x16_t)__builtin_neon_vclzv16qi (__a, 0)
2624 #define vclzq_u16(__a) \
2625 (uint16x8_t)__builtin_neon_vclzv8hi (__a, 0)
2627 #define vclzq_u32(__a) \
2628 (uint32x4_t)__builtin_neon_vclzv4si (__a, 0)
2630 #define vcnt_s8(__a) \
2631 (int8x8_t)__builtin_neon_vcntv8qi (__a, 1)
2633 #define vcnt_u8(__a) \
2634 (uint8x8_t)__builtin_neon_vcntv8qi (__a, 0)
2636 #define vcnt_p8(__a) \
2637 (poly8x8_t)__builtin_neon_vcntv8qi (__a, 4)
2639 #define vcntq_s8(__a) \
2640 (int8x16_t)__builtin_neon_vcntv16qi (__a, 1)
2642 #define vcntq_u8(__a) \
2643 (uint8x16_t)__builtin_neon_vcntv16qi (__a, 0)
2645 #define vcntq_p8(__a) \
2646 (poly8x16_t)__builtin_neon_vcntv16qi (__a, 4)
2648 #define vrecpe_f32(__a) \
2649 (float32x2_t)__builtin_neon_vrecpev2sf (__a, 5)
2651 #define vrecpe_u32(__a) \
2652 (uint32x2_t)__builtin_neon_vrecpev2si (__a, 0)
2654 #define vrecpeq_f32(__a) \
2655 (float32x4_t)__builtin_neon_vrecpev4sf (__a, 5)
2657 #define vrecpeq_u32(__a) \
2658 (uint32x4_t)__builtin_neon_vrecpev4si (__a, 0)
2660 #define vrsqrte_f32(__a) \
2661 (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 5)
2663 #define vrsqrte_u32(__a) \
2664 (uint32x2_t)__builtin_neon_vrsqrtev2si (__a, 0)
2666 #define vrsqrteq_f32(__a) \
2667 (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 5)
2669 #define vrsqrteq_u32(__a) \
2670 (uint32x4_t)__builtin_neon_vrsqrtev4si (__a, 0)
2672 #define vget_lane_s8(__a, __b) \
2673 (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1)
2675 #define vget_lane_s16(__a, __b) \
2676 (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1)
2678 #define vget_lane_s32(__a, __b) \
2679 (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1)
2681 #define vget_lane_f32(__a, __b) \
2682 (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 5)
2684 #define vget_lane_u8(__a, __b) \
2685 (uint8_t)__builtin_neon_vget_lanev8qi (__a, __b, 0)
2687 #define vget_lane_u16(__a, __b) \
2688 (uint16_t)__builtin_neon_vget_lanev4hi (__a, __b, 0)
2690 #define vget_lane_u32(__a, __b) \
2691 (uint32_t)__builtin_neon_vget_lanev2si (__a, __b, 0)
2693 #define vget_lane_p8(__a, __b) \
2694 (poly8_t)__builtin_neon_vget_lanev8qi (__a, __b, 4)
2696 #define vget_lane_p16(__a, __b) \
2697 (poly16_t)__builtin_neon_vget_lanev4hi (__a, __b, 4)
2699 #define vget_lane_s64(__a, __b) \
2700 (int64_t)__builtin_neon_vget_lanev1di (__a, __b, 1)
2702 #define vget_lane_u64(__a, __b) \
2703 (uint64_t)__builtin_neon_vget_lanev1di (__a, __b, 0)
2705 #define vgetq_lane_s8(__a, __b) \
2706 (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1)
2708 #define vgetq_lane_s16(__a, __b) \
2709 (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1)
2711 #define vgetq_lane_s32(__a, __b) \
2712 (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1)
2714 #define vgetq_lane_f32(__a, __b) \
2715 (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 5)
2717 #define vgetq_lane_u8(__a, __b) \
2718 (uint8_t)__builtin_neon_vget_lanev16qi (__a, __b, 0)
2720 #define vgetq_lane_u16(__a, __b) \
2721 (uint16_t)__builtin_neon_vget_lanev8hi (__a, __b, 0)
2723 #define vgetq_lane_u32(__a, __b) \
2724 (uint32_t)__builtin_neon_vget_lanev4si (__a, __b, 0)
2726 #define vgetq_lane_p8(__a, __b) \
2727 (poly8_t)__builtin_neon_vget_lanev16qi (__a, __b, 4)
2729 #define vgetq_lane_p16(__a, __b) \
2730 (poly16_t)__builtin_neon_vget_lanev8hi (__a, __b, 4)
2732 #define vgetq_lane_s64(__a, __b) \
2733 (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1)
2735 #define vgetq_lane_u64(__a, __b) \
2736 (uint64_t)__builtin_neon_vget_lanev2di (__a, __b, 0)
2738 #define vset_lane_s8(__a, __b, __c) \
2739 (int8x8_t)__builtin_neon_vset_lanev8qi (__a, __b, __c)
2741 #define vset_lane_s16(__a, __b, __c) \
2742 (int16x4_t)__builtin_neon_vset_lanev4hi (__a, __b, __c)
2744 #define vset_lane_s32(__a, __b, __c) \
2745 (int32x2_t)__builtin_neon_vset_lanev2si (__a, __b, __c)
2747 #define vset_lane_f32(__a, __b, __c) \
2748 (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c)
2750 #define vset_lane_u8(__a, __b, __c) \
2751 (uint8x8_t)__builtin_neon_vset_lanev8qi (__a, __b, __c)
2753 #define vset_lane_u16(__a, __b, __c) \
2754 (uint16x4_t)__builtin_neon_vset_lanev4hi (__a, __b, __c)
2756 #define vset_lane_u32(__a, __b, __c) \
2757 (uint32x2_t)__builtin_neon_vset_lanev2si (__a, __b, __c)
2759 #define vset_lane_p8(__a, __b, __c) \
2760 (poly8x8_t)__builtin_neon_vset_lanev8qi (__a, __b, __c)
2762 #define vset_lane_p16(__a, __b, __c) \
2763 (poly16x4_t)__builtin_neon_vset_lanev4hi (__a, __b, __c)
2765 #define vset_lane_s64(__a, __b, __c) \
2766 (int64x1_t)__builtin_neon_vset_lanev1di (__a, __b, __c)
2768 #define vset_lane_u64(__a, __b, __c) \
2769 (uint64x1_t)__builtin_neon_vset_lanev1di (__a, __b, __c)
2771 #define vsetq_lane_s8(__a, __b, __c) \
2772 (int8x16_t)__builtin_neon_vset_lanev16qi (__a, __b, __c)
2774 #define vsetq_lane_s16(__a, __b, __c) \
2775 (int16x8_t)__builtin_neon_vset_lanev8hi (__a, __b, __c)
2777 #define vsetq_lane_s32(__a, __b, __c) \
2778 (int32x4_t)__builtin_neon_vset_lanev4si (__a, __b, __c)
2780 #define vsetq_lane_f32(__a, __b, __c) \
2781 (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c)
2783 #define vsetq_lane_u8(__a, __b, __c) \
2784 (uint8x16_t)__builtin_neon_vset_lanev16qi (__a, __b, __c)
2786 #define vsetq_lane_u16(__a, __b, __c) \
2787 (uint16x8_t)__builtin_neon_vset_lanev8hi (__a, __b, __c)
2789 #define vsetq_lane_u32(__a, __b, __c) \
2790 (uint32x4_t)__builtin_neon_vset_lanev4si (__a, __b, __c)
2792 #define vsetq_lane_p8(__a, __b, __c) \
2793 (poly8x16_t)__builtin_neon_vset_lanev16qi (__a, __b, __c)
2795 #define vsetq_lane_p16(__a, __b, __c) \
2796 (poly16x8_t)__builtin_neon_vset_lanev8hi (__a, __b, __c)
2798 #define vsetq_lane_s64(__a, __b, __c) \
2799 (int64x2_t)__builtin_neon_vset_lanev2di (__a, __b, __c)
2801 #define vsetq_lane_u64(__a, __b, __c) \
2802 (uint64x2_t)__builtin_neon_vset_lanev2di (__a, __b, __c)
2804 #define vcreate_s8(__a) \
2805 (int8x8_t)__builtin_neon_vcreatev8qi (__a)
2807 #define vcreate_s16(__a) \
2808 (int16x4_t)__builtin_neon_vcreatev4hi (__a)
2810 #define vcreate_s32(__a) \
2811 (int32x2_t)__builtin_neon_vcreatev2si (__a)
2813 #define vcreate_s64(__a) \
2814 (int64x1_t)__builtin_neon_vcreatev1di (__a)
2816 #define vcreate_f32(__a) \
2817 (float32x2_t)__builtin_neon_vcreatev2sf (__a)
2819 #define vcreate_u8(__a) \
2820 (uint8x8_t)__builtin_neon_vcreatev8qi (__a)
2822 #define vcreate_u16(__a) \
2823 (uint16x4_t)__builtin_neon_vcreatev4hi (__a)
2825 #define vcreate_u32(__a) \
2826 (uint32x2_t)__builtin_neon_vcreatev2si (__a)
2828 #define vcreate_u64(__a) \
2829 (uint64x1_t)__builtin_neon_vcreatev1di (__a)
2831 #define vcreate_p8(__a) \
2832 (poly8x8_t)__builtin_neon_vcreatev8qi (__a)
2834 #define vcreate_p16(__a) \
2835 (poly16x4_t)__builtin_neon_vcreatev4hi (__a)
2837 #define vdup_n_s8(__a) \
2838 (int8x8_t)__builtin_neon_vdup_nv8qi (__a)
2840 #define vdup_n_s16(__a) \
2841 (int16x4_t)__builtin_neon_vdup_nv4hi (__a)
2843 #define vdup_n_s32(__a) \
2844 (int32x2_t)__builtin_neon_vdup_nv2si (__a)
2846 #define vdup_n_f32(__a) \
2847 (float32x2_t)__builtin_neon_vdup_nv2sf (__a)
2849 #define vdup_n_u8(__a) \
2850 (uint8x8_t)__builtin_neon_vdup_nv8qi (__a)
2852 #define vdup_n_u16(__a) \
2853 (uint16x4_t)__builtin_neon_vdup_nv4hi (__a)
2855 #define vdup_n_u32(__a) \
2856 (uint32x2_t)__builtin_neon_vdup_nv2si (__a)
2858 #define vdup_n_p8(__a) \
2859 (poly8x8_t)__builtin_neon_vdup_nv8qi (__a)
2861 #define vdup_n_p16(__a) \
2862 (poly16x4_t)__builtin_neon_vdup_nv4hi (__a)
2864 #define vdup_n_s64(__a) \
2865 (int64x1_t)__builtin_neon_vdup_nv1di (__a)
2867 #define vdup_n_u64(__a) \
2868 (uint64x1_t)__builtin_neon_vdup_nv1di (__a)
2870 #define vdupq_n_s8(__a) \
2871 (int8x16_t)__builtin_neon_vdup_nv16qi (__a)
2873 #define vdupq_n_s16(__a) \
2874 (int16x8_t)__builtin_neon_vdup_nv8hi (__a)
2876 #define vdupq_n_s32(__a) \
2877 (int32x4_t)__builtin_neon_vdup_nv4si (__a)
2879 #define vdupq_n_f32(__a) \
2880 (float32x4_t)__builtin_neon_vdup_nv4sf (__a)
2882 #define vdupq_n_u8(__a) \
2883 (uint8x16_t)__builtin_neon_vdup_nv16qi (__a)
2885 #define vdupq_n_u16(__a) \
2886 (uint16x8_t)__builtin_neon_vdup_nv8hi (__a)
2888 #define vdupq_n_u32(__a) \
2889 (uint32x4_t)__builtin_neon_vdup_nv4si (__a)
2891 #define vdupq_n_p8(__a) \
2892 (poly8x16_t)__builtin_neon_vdup_nv16qi (__a)
2894 #define vdupq_n_p16(__a) \
2895 (poly16x8_t)__builtin_neon_vdup_nv8hi (__a)
2897 #define vdupq_n_s64(__a) \
2898 (int64x2_t)__builtin_neon_vdup_nv2di (__a)
2900 #define vdupq_n_u64(__a) \
2901 (uint64x2_t)__builtin_neon_vdup_nv2di (__a)
2903 #define vmov_n_s8(__a) \
2904 (int8x8_t)__builtin_neon_vdup_nv8qi (__a)
2906 #define vmov_n_s16(__a) \
2907 (int16x4_t)__builtin_neon_vdup_nv4hi (__a)
2909 #define vmov_n_s32(__a) \
2910 (int32x2_t)__builtin_neon_vdup_nv2si (__a)
2912 #define vmov_n_f32(__a) \
2913 (float32x2_t)__builtin_neon_vdup_nv2sf (__a)
2915 #define vmov_n_u8(__a) \
2916 (uint8x8_t)__builtin_neon_vdup_nv8qi (__a)
2918 #define vmov_n_u16(__a) \
2919 (uint16x4_t)__builtin_neon_vdup_nv4hi (__a)
2921 #define vmov_n_u32(__a) \
2922 (uint32x2_t)__builtin_neon_vdup_nv2si (__a)
2924 #define vmov_n_p8(__a) \
2925 (poly8x8_t)__builtin_neon_vdup_nv8qi (__a)
2927 #define vmov_n_p16(__a) \
2928 (poly16x4_t)__builtin_neon_vdup_nv4hi (__a)
2930 #define vmov_n_s64(__a) \
2931 (int64x1_t)__builtin_neon_vdup_nv1di (__a)
2933 #define vmov_n_u64(__a) \
2934 (uint64x1_t)__builtin_neon_vdup_nv1di (__a)
2936 #define vmovq_n_s8(__a) \
2937 (int8x16_t)__builtin_neon_vdup_nv16qi (__a)
2939 #define vmovq_n_s16(__a) \
2940 (int16x8_t)__builtin_neon_vdup_nv8hi (__a)
2942 #define vmovq_n_s32(__a) \
2943 (int32x4_t)__builtin_neon_vdup_nv4si (__a)
2945 #define vmovq_n_f32(__a) \
2946 (float32x4_t)__builtin_neon_vdup_nv4sf (__a)
2948 #define vmovq_n_u8(__a) \
2949 (uint8x16_t)__builtin_neon_vdup_nv16qi (__a)
2951 #define vmovq_n_u16(__a) \
2952 (uint16x8_t)__builtin_neon_vdup_nv8hi (__a)
2954 #define vmovq_n_u32(__a) \
2955 (uint32x4_t)__builtin_neon_vdup_nv4si (__a)
2957 #define vmovq_n_p8(__a) \
2958 (poly8x16_t)__builtin_neon_vdup_nv16qi (__a)
2960 #define vmovq_n_p16(__a) \
2961 (poly16x8_t)__builtin_neon_vdup_nv8hi (__a)
2963 #define vmovq_n_s64(__a) \
2964 (int64x2_t)__builtin_neon_vdup_nv2di (__a)
2966 #define vmovq_n_u64(__a) \
2967 (uint64x2_t)__builtin_neon_vdup_nv2di (__a)
2969 #define vdup_lane_s8(__a, __b) \
2970 (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b)
2972 #define vdup_lane_s16(__a, __b) \
2973 (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b)
2975 #define vdup_lane_s32(__a, __b) \
2976 (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b)
2978 #define vdup_lane_f32(__a, __b) \
2979 (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b)
2981 #define vdup_lane_u8(__a, __b) \
2982 (uint8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b)
2984 #define vdup_lane_u16(__a, __b) \
2985 (uint16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b)
2987 #define vdup_lane_u32(__a, __b) \
2988 (uint32x2_t)__builtin_neon_vdup_lanev2si (__a, __b)
2990 #define vdup_lane_p8(__a, __b) \
2991 (poly8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b)
2993 #define vdup_lane_p16(__a, __b) \
2994 (poly16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b)
2996 #define vdup_lane_s64(__a, __b) \
2997 (int64x1_t)__builtin_neon_vdup_lanev1di (__a, __b)
2999 #define vdup_lane_u64(__a, __b) \
3000 (uint64x1_t)__builtin_neon_vdup_lanev1di (__a, __b)
3002 #define vdupq_lane_s8(__a, __b) \
3003 (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b)
3005 #define vdupq_lane_s16(__a, __b) \
3006 (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b)
3008 #define vdupq_lane_s32(__a, __b) \
3009 (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b)
3011 #define vdupq_lane_f32(__a, __b) \
3012 (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b)
3014 #define vdupq_lane_u8(__a, __b) \
3015 (uint8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b)
3017 #define vdupq_lane_u16(__a, __b) \
3018 (uint16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b)
3020 #define vdupq_lane_u32(__a, __b) \
3021 (uint32x4_t)__builtin_neon_vdup_lanev4si (__a, __b)
3023 #define vdupq_lane_p8(__a, __b) \
3024 (poly8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b)
3026 #define vdupq_lane_p16(__a, __b) \
3027 (poly16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b)
3029 #define vdupq_lane_s64(__a, __b) \
3030 (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b)
3032 #define vdupq_lane_u64(__a, __b) \
3033 (uint64x2_t)__builtin_neon_vdup_lanev2di (__a, __b)
3035 #define vcombine_s8(__a, __b) \
3036 (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b)
3038 #define vcombine_s16(__a, __b) \
3039 (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b)
3041 #define vcombine_s32(__a, __b) \
3042 (int32x4_t)__builtin_neon_vcombinev2si (__a, __b)
3044 #define vcombine_s64(__a, __b) \
3045 (int64x2_t)__builtin_neon_vcombinev1di (__a, __b)
3047 #define vcombine_f32(__a, __b) \
3048 (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b)
3050 #define vcombine_u8(__a, __b) \
3051 (uint8x16_t)__builtin_neon_vcombinev8qi (__a, __b)
3053 #define vcombine_u16(__a, __b) \
3054 (uint16x8_t)__builtin_neon_vcombinev4hi (__a, __b)
3056 #define vcombine_u32(__a, __b) \
3057 (uint32x4_t)__builtin_neon_vcombinev2si (__a, __b)
3059 #define vcombine_u64(__a, __b) \
3060 (uint64x2_t)__builtin_neon_vcombinev1di (__a, __b)
3062 #define vcombine_p8(__a, __b) \
3063 (poly8x16_t)__builtin_neon_vcombinev8qi (__a, __b)
3065 #define vcombine_p16(__a, __b) \
3066 (poly16x8_t)__builtin_neon_vcombinev4hi (__a, __b)
3068 #define vget_high_s8(__a) \
3069 (int8x8_t)__builtin_neon_vget_highv16qi (__a)
3071 #define vget_high_s16(__a) \
3072 (int16x4_t)__builtin_neon_vget_highv8hi (__a)
3074 #define vget_high_s32(__a) \
3075 (int32x2_t)__builtin_neon_vget_highv4si (__a)
3077 #define vget_high_s64(__a) \
3078 (int64x1_t)__builtin_neon_vget_highv2di (__a)
3080 #define vget_high_f32(__a) \
3081 (float32x2_t)__builtin_neon_vget_highv4sf (__a)
3083 #define vget_high_u8(__a) \
3084 (uint8x8_t)__builtin_neon_vget_highv16qi (__a)
3086 #define vget_high_u16(__a) \
3087 (uint16x4_t)__builtin_neon_vget_highv8hi (__a)
3089 #define vget_high_u32(__a) \
3090 (uint32x2_t)__builtin_neon_vget_highv4si (__a)
3092 #define vget_high_u64(__a) \
3093 (uint64x1_t)__builtin_neon_vget_highv2di (__a)
3095 #define vget_high_p8(__a) \
3096 (poly8x8_t)__builtin_neon_vget_highv16qi (__a)
3098 #define vget_high_p16(__a) \
3099 (poly16x4_t)__builtin_neon_vget_highv8hi (__a)
3101 #define vget_low_s8(__a) \
3102 (int8x8_t)__builtin_neon_vget_lowv16qi (__a)
3104 #define vget_low_s16(__a) \
3105 (int16x4_t)__builtin_neon_vget_lowv8hi (__a)
3107 #define vget_low_s32(__a) \
3108 (int32x2_t)__builtin_neon_vget_lowv4si (__a)
3110 #define vget_low_s64(__a) \
3111 (int64x1_t)__builtin_neon_vget_lowv2di (__a)
3113 #define vget_low_f32(__a) \
3114 (float32x2_t)__builtin_neon_vget_lowv4sf (__a)
3116 #define vget_low_u8(__a) \
3117 (uint8x8_t)__builtin_neon_vget_lowv16qi (__a)
3119 #define vget_low_u16(__a) \
3120 (uint16x4_t)__builtin_neon_vget_lowv8hi (__a)
3122 #define vget_low_u32(__a) \
3123 (uint32x2_t)__builtin_neon_vget_lowv4si (__a)
3125 #define vget_low_u64(__a) \
3126 (uint64x1_t)__builtin_neon_vget_lowv2di (__a)
3128 #define vget_low_p8(__a) \
3129 (poly8x8_t)__builtin_neon_vget_lowv16qi (__a)
3131 #define vget_low_p16(__a) \
3132 (poly16x4_t)__builtin_neon_vget_lowv8hi (__a)
3134 #define vcvt_s32_f32(__a) \
3135 (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1)
3137 #define vcvt_f32_s32(__a) \
3138 (float32x2_t)__builtin_neon_vcvtv2si (__a, 1)
3140 #define vcvt_f32_u32(__a) \
3141 (float32x2_t)__builtin_neon_vcvtv2si (__a, 0)
3143 #define vcvt_u32_f32(__a) \
3144 (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0)
3146 #define vcvtq_s32_f32(__a) \
3147 (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1)
3149 #define vcvtq_f32_s32(__a) \
3150 (float32x4_t)__builtin_neon_vcvtv4si (__a, 1)
3152 #define vcvtq_f32_u32(__a) \
3153 (float32x4_t)__builtin_neon_vcvtv4si (__a, 0)
3155 #define vcvtq_u32_f32(__a) \
3156 (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0)
3158 #define vcvt_n_s32_f32(__a, __b) \
3159 (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1)
3161 #define vcvt_n_f32_s32(__a, __b) \
3162 (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1)
3164 #define vcvt_n_f32_u32(__a, __b) \
3165 (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 0)
3167 #define vcvt_n_u32_f32(__a, __b) \
3168 (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0)
3170 #define vcvtq_n_s32_f32(__a, __b) \
3171 (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1)
3173 #define vcvtq_n_f32_s32(__a, __b) \
3174 (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1)
3176 #define vcvtq_n_f32_u32(__a, __b) \
3177 (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 0)
3179 #define vcvtq_n_u32_f32(__a, __b) \
3180 (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0)
3182 #define vmovn_s16(__a) \
3183 (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1)
3185 #define vmovn_s32(__a) \
3186 (int16x4_t)__builtin_neon_vmovnv4si (__a, 1)
3188 #define vmovn_s64(__a) \
3189 (int32x2_t)__builtin_neon_vmovnv2di (__a, 1)
3191 #define vmovn_u16(__a) \
3192 (uint8x8_t)__builtin_neon_vmovnv8hi (__a, 0)
3194 #define vmovn_u32(__a) \
3195 (uint16x4_t)__builtin_neon_vmovnv4si (__a, 0)
3197 #define vmovn_u64(__a) \
3198 (uint32x2_t)__builtin_neon_vmovnv2di (__a, 0)
3200 #define vqmovn_s16(__a) \
3201 (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1)
3203 #define vqmovn_s32(__a) \
3204 (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1)
3206 #define vqmovn_s64(__a) \
3207 (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1)
3209 #define vqmovn_u16(__a) \
3210 (uint8x8_t)__builtin_neon_vqmovnv8hi (__a, 0)
3212 #define vqmovn_u32(__a) \
3213 (uint16x4_t)__builtin_neon_vqmovnv4si (__a, 0)
3215 #define vqmovn_u64(__a) \
3216 (uint32x2_t)__builtin_neon_vqmovnv2di (__a, 0)
3218 #define vqmovun_s16(__a) \
3219 (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1)
3221 #define vqmovun_s32(__a) \
3222 (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1)
3224 #define vqmovun_s64(__a) \
3225 (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1)
3227 #define vmovl_s8(__a) \
3228 (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1)
3230 #define vmovl_s16(__a) \
3231 (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1)
3233 #define vmovl_s32(__a) \
3234 (int64x2_t)__builtin_neon_vmovlv2si (__a, 1)
3236 #define vmovl_u8(__a) \
3237 (uint16x8_t)__builtin_neon_vmovlv8qi (__a, 0)
3239 #define vmovl_u16(__a) \
3240 (uint32x4_t)__builtin_neon_vmovlv4hi (__a, 0)
3242 #define vmovl_u32(__a) \
3243 (uint64x2_t)__builtin_neon_vmovlv2si (__a, 0)
3245 #define vtbl1_s8(__a, __b) \
3246 (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b)
3248 #define vtbl1_u8(__a, __b) \
3249 (uint8x8_t)__builtin_neon_vtbl1v8qi (__a, __b)
3251 #define vtbl1_p8(__a, __b) \
3252 (poly8x8_t)__builtin_neon_vtbl1v8qi (__a, __b)
3254 #define vtbl2_s8(__a, __b) \
3256 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __au = { __a }; \
3257 (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \
3260 #define vtbl2_u8(__a, __b) \
3262 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __au = { __a }; \
3263 (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \
3266 #define vtbl2_p8(__a, __b) \
3268 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __au = { __a }; \
3269 (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \
3272 #define vtbl3_s8(__a, __b) \
3274 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __au = { __a }; \
3275 (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \
3278 #define vtbl3_u8(__a, __b) \
3280 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __au = { __a }; \
3281 (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \
3284 #define vtbl3_p8(__a, __b) \
3286 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __au = { __a }; \
3287 (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \
3290 #define vtbl4_s8(__a, __b) \
3292 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __au = { __a }; \
3293 (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \
3296 #define vtbl4_u8(__a, __b) \
3298 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __au = { __a }; \
3299 (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \
3302 #define vtbl4_p8(__a, __b) \
3304 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __au = { __a }; \
3305 (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \
3308 #define vtbx1_s8(__a, __b, __c) \
3309 (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c)
3311 #define vtbx1_u8(__a, __b, __c) \
3312 (uint8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c)
3314 #define vtbx1_p8(__a, __b, __c) \
3315 (poly8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c)
3317 #define vtbx2_s8(__a, __b, __c) \
3319 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
3320 (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \
3323 #define vtbx2_u8(__a, __b, __c) \
3325 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
3326 (uint8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \
3329 #define vtbx2_p8(__a, __b, __c) \
3331 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
3332 (poly8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \
3335 #define vtbx3_s8(__a, __b, __c) \
3337 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
3338 (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \
3341 #define vtbx3_u8(__a, __b, __c) \
3343 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
3344 (uint8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \
3347 #define vtbx3_p8(__a, __b, __c) \
3349 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
3350 (poly8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \
3353 #define vtbx4_s8(__a, __b, __c) \
3355 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
3356 (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \
3359 #define vtbx4_u8(__a, __b, __c) \
3361 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
3362 (uint8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \
3365 #define vtbx4_p8(__a, __b, __c) \
3367 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
3368 (poly8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \
3371 #define vmul_lane_s16(__a, __b, __c) \
3372 (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1)
3374 #define vmul_lane_s32(__a, __b, __c) \
3375 (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1)
3377 #define vmul_lane_f32(__a, __b, __c) \
3378 (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 5)
3380 #define vmul_lane_u16(__a, __b, __c) \
3381 (uint16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 0)
3383 #define vmul_lane_u32(__a, __b, __c) \
3384 (uint32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 0)
3386 #define vmulq_lane_s16(__a, __b, __c) \
3387 (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1)
3389 #define vmulq_lane_s32(__a, __b, __c) \
3390 (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1)
3392 #define vmulq_lane_f32(__a, __b, __c) \
3393 (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 5)
3395 #define vmulq_lane_u16(__a, __b, __c) \
3396 (uint16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 0)
3398 #define vmulq_lane_u32(__a, __b, __c) \
3399 (uint32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 0)
3401 #define vmla_lane_s16(__a, __b, __c, __d) \
3402 (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1)
3404 #define vmla_lane_s32(__a, __b, __c, __d) \
3405 (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1)
3407 #define vmla_lane_f32(__a, __b, __c, __d) \
3408 (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 5)
3410 #define vmla_lane_u16(__a, __b, __c, __d) \
3411 (uint16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 0)
3413 #define vmla_lane_u32(__a, __b, __c, __d) \
3414 (uint32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 0)
3416 #define vmlaq_lane_s16(__a, __b, __c, __d) \
3417 (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1)
3419 #define vmlaq_lane_s32(__a, __b, __c, __d) \
3420 (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1)
3422 #define vmlaq_lane_f32(__a, __b, __c, __d) \
3423 (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 5)
3425 #define vmlaq_lane_u16(__a, __b, __c, __d) \
3426 (uint16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 0)
3428 #define vmlaq_lane_u32(__a, __b, __c, __d) \
3429 (uint32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 0)
3431 #define vmlal_lane_s16(__a, __b, __c, __d) \
3432 (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1)
3434 #define vmlal_lane_s32(__a, __b, __c, __d) \
3435 (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1)
3437 #define vmlal_lane_u16(__a, __b, __c, __d) \
3438 (uint32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 0)
3440 #define vmlal_lane_u32(__a, __b, __c, __d) \
3441 (uint64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 0)
3443 #define vqdmlal_lane_s16(__a, __b, __c, __d) \
3444 (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1)
3446 #define vqdmlal_lane_s32(__a, __b, __c, __d) \
3447 (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1)
3449 #define vmls_lane_s16(__a, __b, __c, __d) \
3450 (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1)
3452 #define vmls_lane_s32(__a, __b, __c, __d) \
3453 (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1)
3455 #define vmls_lane_f32(__a, __b, __c, __d) \
3456 (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 5)
3458 #define vmls_lane_u16(__a, __b, __c, __d) \
3459 (uint16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 0)
3461 #define vmls_lane_u32(__a, __b, __c, __d) \
3462 (uint32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 0)
3464 #define vmlsq_lane_s16(__a, __b, __c, __d) \
3465 (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1)
3467 #define vmlsq_lane_s32(__a, __b, __c, __d) \
3468 (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1)
3470 #define vmlsq_lane_f32(__a, __b, __c, __d) \
3471 (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 5)
3473 #define vmlsq_lane_u16(__a, __b, __c, __d) \
3474 (uint16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 0)
3476 #define vmlsq_lane_u32(__a, __b, __c, __d) \
3477 (uint32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 0)
3479 #define vmlsl_lane_s16(__a, __b, __c, __d) \
3480 (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1)
3482 #define vmlsl_lane_s32(__a, __b, __c, __d) \
3483 (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1)
3485 #define vmlsl_lane_u16(__a, __b, __c, __d) \
3486 (uint32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 0)
3488 #define vmlsl_lane_u32(__a, __b, __c, __d) \
3489 (uint64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 0)
3491 #define vqdmlsl_lane_s16(__a, __b, __c, __d) \
3492 (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1)
3494 #define vqdmlsl_lane_s32(__a, __b, __c, __d) \
3495 (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1)
3497 #define vmull_lane_s16(__a, __b, __c) \
3498 (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1)
3500 #define vmull_lane_s32(__a, __b, __c) \
3501 (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1)
3503 #define vmull_lane_u16(__a, __b, __c) \
3504 (uint32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 0)
3506 #define vmull_lane_u32(__a, __b, __c) \
3507 (uint64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 0)
3509 #define vqdmull_lane_s16(__a, __b, __c) \
3510 (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1)
3512 #define vqdmull_lane_s32(__a, __b, __c) \
3513 (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1)
3515 #define vqdmulhq_lane_s16(__a, __b, __c) \
3516 (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1)
3518 #define vqdmulhq_lane_s32(__a, __b, __c) \
3519 (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1)
3521 #define vqdmulh_lane_s16(__a, __b, __c) \
3522 (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1)
3524 #define vqdmulh_lane_s32(__a, __b, __c) \
3525 (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1)
3527 #define vqrdmulhq_lane_s16(__a, __b, __c) \
3528 (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 3)
3530 #define vqrdmulhq_lane_s32(__a, __b, __c) \
3531 (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 3)
3533 #define vqrdmulh_lane_s16(__a, __b, __c) \
3534 (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 3)
3536 #define vqrdmulh_lane_s32(__a, __b, __c) \
3537 (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 3)
3539 #define vmul_n_s16(__a, __b) \
3540 (int16x4_t)__builtin_neon_vmul_nv4hi (__a, __b, 1)
3542 #define vmul_n_s32(__a, __b) \
3543 (int32x2_t)__builtin_neon_vmul_nv2si (__a, __b, 1)
3545 #define vmul_n_f32(__a, __b) \
3546 (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 5)
3548 #define vmul_n_u16(__a, __b) \
3549 (uint16x4_t)__builtin_neon_vmul_nv4hi (__a, __b, 0)
3551 #define vmul_n_u32(__a, __b) \
3552 (uint32x2_t)__builtin_neon_vmul_nv2si (__a, __b, 0)
3554 #define vmulq_n_s16(__a, __b) \
3555 (int16x8_t)__builtin_neon_vmul_nv8hi (__a, __b, 1)
3557 #define vmulq_n_s32(__a, __b) \
3558 (int32x4_t)__builtin_neon_vmul_nv4si (__a, __b, 1)
3560 #define vmulq_n_f32(__a, __b) \
3561 (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 5)
3563 #define vmulq_n_u16(__a, __b) \
3564 (uint16x8_t)__builtin_neon_vmul_nv8hi (__a, __b, 0)
3566 #define vmulq_n_u32(__a, __b) \
3567 (uint32x4_t)__builtin_neon_vmul_nv4si (__a, __b, 0)
3569 #define vmull_n_s16(__a, __b) \
3570 (int32x4_t)__builtin_neon_vmull_nv4hi (__a, __b, 1)
3572 #define vmull_n_s32(__a, __b) \
3573 (int64x2_t)__builtin_neon_vmull_nv2si (__a, __b, 1)
3575 #define vmull_n_u16(__a, __b) \
3576 (uint32x4_t)__builtin_neon_vmull_nv4hi (__a, __b, 0)
3578 #define vmull_n_u32(__a, __b) \
3579 (uint64x2_t)__builtin_neon_vmull_nv2si (__a, __b, 0)
3581 #define vqdmull_n_s16(__a, __b) \
3582 (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, __b, 1)
3584 #define vqdmull_n_s32(__a, __b) \
3585 (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, __b, 1)
3587 #define vqdmulhq_n_s16(__a, __b) \
3588 (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, __b, 1)
3590 #define vqdmulhq_n_s32(__a, __b) \
3591 (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, __b, 1)
3593 #define vqdmulh_n_s16(__a, __b) \
3594 (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, __b, 1)
3596 #define vqdmulh_n_s32(__a, __b) \
3597 (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, __b, 1)
3599 #define vqrdmulhq_n_s16(__a, __b) \
3600 (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, __b, 3)
3602 #define vqrdmulhq_n_s32(__a, __b) \
3603 (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, __b, 3)
3605 #define vqrdmulh_n_s16(__a, __b) \
3606 (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, __b, 3)
3608 #define vqrdmulh_n_s32(__a, __b) \
3609 (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, __b, 3)
3611 #define vmla_n_s16(__a, __b, __c) \
3612 (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, __c, 1)
3614 #define vmla_n_s32(__a, __b, __c) \
3615 (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, __c, 1)
3617 #define vmla_n_f32(__a, __b, __c) \
3618 (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 5)
3620 #define vmla_n_u16(__a, __b, __c) \
3621 (uint16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, __c, 0)
3623 #define vmla_n_u32(__a, __b, __c) \
3624 (uint32x2_t)__builtin_neon_vmla_nv2si (__a, __b, __c, 0)
3626 #define vmlaq_n_s16(__a, __b, __c) \
3627 (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, __c, 1)
3629 #define vmlaq_n_s32(__a, __b, __c) \
3630 (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, __c, 1)
3632 #define vmlaq_n_f32(__a, __b, __c) \
3633 (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 5)
3635 #define vmlaq_n_u16(__a, __b, __c) \
3636 (uint16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, __c, 0)
3638 #define vmlaq_n_u32(__a, __b, __c) \
3639 (uint32x4_t)__builtin_neon_vmla_nv4si (__a, __b, __c, 0)
3641 #define vmlal_n_s16(__a, __b, __c) \
3642 (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, __c, 1)
3644 #define vmlal_n_s32(__a, __b, __c) \
3645 (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, __c, 1)
3647 #define vmlal_n_u16(__a, __b, __c) \
3648 (uint32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, __c, 0)
3650 #define vmlal_n_u32(__a, __b, __c) \
3651 (uint64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, __c, 0)
3653 #define vqdmlal_n_s16(__a, __b, __c) \
3654 (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, __c, 1)
3656 #define vqdmlal_n_s32(__a, __b, __c) \
3657 (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, __c, 1)
3659 #define vmls_n_s16(__a, __b, __c) \
3660 (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, __c, 1)
3662 #define vmls_n_s32(__a, __b, __c) \
3663 (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, __c, 1)
3665 #define vmls_n_f32(__a, __b, __c) \
3666 (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 5)
3668 #define vmls_n_u16(__a, __b, __c) \
3669 (uint16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, __c, 0)
3671 #define vmls_n_u32(__a, __b, __c) \
3672 (uint32x2_t)__builtin_neon_vmls_nv2si (__a, __b, __c, 0)
3674 #define vmlsq_n_s16(__a, __b, __c) \
3675 (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, __c, 1)
3677 #define vmlsq_n_s32(__a, __b, __c) \
3678 (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, __c, 1)
3680 #define vmlsq_n_f32(__a, __b, __c) \
3681 (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 5)
3683 #define vmlsq_n_u16(__a, __b, __c) \
3684 (uint16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, __c, 0)
3686 #define vmlsq_n_u32(__a, __b, __c) \
3687 (uint32x4_t)__builtin_neon_vmls_nv4si (__a, __b, __c, 0)
3689 #define vmlsl_n_s16(__a, __b, __c) \
3690 (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, __c, 1)
3692 #define vmlsl_n_s32(__a, __b, __c) \
3693 (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, __c, 1)
3695 #define vmlsl_n_u16(__a, __b, __c) \
3696 (uint32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, __c, 0)
3698 #define vmlsl_n_u32(__a, __b, __c) \
3699 (uint64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, __c, 0)
3701 #define vqdmlsl_n_s16(__a, __b, __c) \
3702 (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, __c, 1)
3704 #define vqdmlsl_n_s32(__a, __b, __c) \
3705 (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, __c, 1)
3707 #define vext_s8(__a, __b, __c) \
3708 (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c)
3710 #define vext_s16(__a, __b, __c) \
3711 (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c)
3713 #define vext_s32(__a, __b, __c) \
3714 (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c)
3716 #define vext_s64(__a, __b, __c) \
3717 (int64x1_t)__builtin_neon_vextv1di (__a, __b, __c)
3719 #define vext_f32(__a, __b, __c) \
3720 (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c)
3722 #define vext_u8(__a, __b, __c) \
3723 (uint8x8_t)__builtin_neon_vextv8qi (__a, __b, __c)
3725 #define vext_u16(__a, __b, __c) \
3726 (uint16x4_t)__builtin_neon_vextv4hi (__a, __b, __c)
3728 #define vext_u32(__a, __b, __c) \
3729 (uint32x2_t)__builtin_neon_vextv2si (__a, __b, __c)
3731 #define vext_u64(__a, __b, __c) \
3732 (uint64x1_t)__builtin_neon_vextv1di (__a, __b, __c)
3734 #define vext_p8(__a, __b, __c) \
3735 (poly8x8_t)__builtin_neon_vextv8qi (__a, __b, __c)
3737 #define vext_p16(__a, __b, __c) \
3738 (poly16x4_t)__builtin_neon_vextv4hi (__a, __b, __c)
3740 #define vextq_s8(__a, __b, __c) \
3741 (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c)
3743 #define vextq_s16(__a, __b, __c) \
3744 (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c)
3746 #define vextq_s32(__a, __b, __c) \
3747 (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c)
3749 #define vextq_s64(__a, __b, __c) \
3750 (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c)
3752 #define vextq_f32(__a, __b, __c) \
3753 (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c)
3755 #define vextq_u8(__a, __b, __c) \
3756 (uint8x16_t)__builtin_neon_vextv16qi (__a, __b, __c)
3758 #define vextq_u16(__a, __b, __c) \
3759 (uint16x8_t)__builtin_neon_vextv8hi (__a, __b, __c)
3761 #define vextq_u32(__a, __b, __c) \
3762 (uint32x4_t)__builtin_neon_vextv4si (__a, __b, __c)
3764 #define vextq_u64(__a, __b, __c) \
3765 (uint64x2_t)__builtin_neon_vextv2di (__a, __b, __c)
3767 #define vextq_p8(__a, __b, __c) \
3768 (poly8x16_t)__builtin_neon_vextv16qi (__a, __b, __c)
3770 #define vextq_p16(__a, __b, __c) \
3771 (poly16x8_t)__builtin_neon_vextv8hi (__a, __b, __c)
3773 #define vrev64_s8(__a) \
3774 (int8x8_t)__builtin_neon_vrev64v8qi (__a, 1)
3776 #define vrev64_s16(__a) \
3777 (int16x4_t)__builtin_neon_vrev64v4hi (__a, 1)
3779 #define vrev64_s32(__a) \
3780 (int32x2_t)__builtin_neon_vrev64v2si (__a, 1)
3782 #define vrev64_f32(__a) \
3783 (float32x2_t)__builtin_neon_vrev64v2sf (__a, 5)
3785 #define vrev64_u8(__a) \
3786 (uint8x8_t)__builtin_neon_vrev64v8qi (__a, 0)
3788 #define vrev64_u16(__a) \
3789 (uint16x4_t)__builtin_neon_vrev64v4hi (__a, 0)
3791 #define vrev64_u32(__a) \
3792 (uint32x2_t)__builtin_neon_vrev64v2si (__a, 0)
3794 #define vrev64_p8(__a) \
3795 (poly8x8_t)__builtin_neon_vrev64v8qi (__a, 4)
3797 #define vrev64_p16(__a) \
3798 (poly16x4_t)__builtin_neon_vrev64v4hi (__a, 4)
3800 #define vrev64q_s8(__a) \
3801 (int8x16_t)__builtin_neon_vrev64v16qi (__a, 1)
3803 #define vrev64q_s16(__a) \
3804 (int16x8_t)__builtin_neon_vrev64v8hi (__a, 1)
3806 #define vrev64q_s32(__a) \
3807 (int32x4_t)__builtin_neon_vrev64v4si (__a, 1)
3809 #define vrev64q_f32(__a) \
3810 (float32x4_t)__builtin_neon_vrev64v4sf (__a, 5)
3812 #define vrev64q_u8(__a) \
3813 (uint8x16_t)__builtin_neon_vrev64v16qi (__a, 0)
3815 #define vrev64q_u16(__a) \
3816 (uint16x8_t)__builtin_neon_vrev64v8hi (__a, 0)
3818 #define vrev64q_u32(__a) \
3819 (uint32x4_t)__builtin_neon_vrev64v4si (__a, 0)
3821 #define vrev64q_p8(__a) \
3822 (poly8x16_t)__builtin_neon_vrev64v16qi (__a, 4)
3824 #define vrev64q_p16(__a) \
3825 (poly16x8_t)__builtin_neon_vrev64v8hi (__a, 4)
3827 #define vrev32_s8(__a) \
3828 (int8x8_t)__builtin_neon_vrev32v8qi (__a, 1)
3830 #define vrev32_s16(__a) \
3831 (int16x4_t)__builtin_neon_vrev32v4hi (__a, 1)
3833 #define vrev32_u8(__a) \
3834 (uint8x8_t)__builtin_neon_vrev32v8qi (__a, 0)
3836 #define vrev32_u16(__a) \
3837 (uint16x4_t)__builtin_neon_vrev32v4hi (__a, 0)
3839 #define vrev32_p8(__a) \
3840 (poly8x8_t)__builtin_neon_vrev32v8qi (__a, 4)
3842 #define vrev32_p16(__a) \
3843 (poly16x4_t)__builtin_neon_vrev32v4hi (__a, 4)
3845 #define vrev32q_s8(__a) \
3846 (int8x16_t)__builtin_neon_vrev32v16qi (__a, 1)
3848 #define vrev32q_s16(__a) \
3849 (int16x8_t)__builtin_neon_vrev32v8hi (__a, 1)
3851 #define vrev32q_u8(__a) \
3852 (uint8x16_t)__builtin_neon_vrev32v16qi (__a, 0)
3854 #define vrev32q_u16(__a) \
3855 (uint16x8_t)__builtin_neon_vrev32v8hi (__a, 0)
3857 #define vrev32q_p8(__a) \
3858 (poly8x16_t)__builtin_neon_vrev32v16qi (__a, 4)
3860 #define vrev32q_p16(__a) \
3861 (poly16x8_t)__builtin_neon_vrev32v8hi (__a, 4)
3863 #define vrev16_s8(__a) \
3864 (int8x8_t)__builtin_neon_vrev16v8qi (__a, 1)
3866 #define vrev16_u8(__a) \
3867 (uint8x8_t)__builtin_neon_vrev16v8qi (__a, 0)
3869 #define vrev16_p8(__a) \
3870 (poly8x8_t)__builtin_neon_vrev16v8qi (__a, 4)
3872 #define vrev16q_s8(__a) \
3873 (int8x16_t)__builtin_neon_vrev16v16qi (__a, 1)
3875 #define vrev16q_u8(__a) \
3876 (uint8x16_t)__builtin_neon_vrev16v16qi (__a, 0)
3878 #define vrev16q_p8(__a) \
3879 (poly8x16_t)__builtin_neon_vrev16v16qi (__a, 4)
3881 #define vbsl_s8(__a, __b, __c) \
3882 (int8x8_t)__builtin_neon_vbslv8qi (__a, __b, __c)
3884 #define vbsl_s16(__a, __b, __c) \
3885 (int16x4_t)__builtin_neon_vbslv4hi (__a, __b, __c)
3887 #define vbsl_s32(__a, __b, __c) \
3888 (int32x2_t)__builtin_neon_vbslv2si (__a, __b, __c)
3890 #define vbsl_s64(__a, __b, __c) \
3891 (int64x1_t)__builtin_neon_vbslv1di (__a, __b, __c)
3893 #define vbsl_f32(__a, __b, __c) \
3894 (float32x2_t)__builtin_neon_vbslv2sf (__a, __b, __c)
3896 #define vbsl_u8(__a, __b, __c) \
3897 (uint8x8_t)__builtin_neon_vbslv8qi (__a, __b, __c)
3899 #define vbsl_u16(__a, __b, __c) \
3900 (uint16x4_t)__builtin_neon_vbslv4hi (__a, __b, __c)
3902 #define vbsl_u32(__a, __b, __c) \
3903 (uint32x2_t)__builtin_neon_vbslv2si (__a, __b, __c)
3905 #define vbsl_u64(__a, __b, __c) \
3906 (uint64x1_t)__builtin_neon_vbslv1di (__a, __b, __c)
3908 #define vbsl_p8(__a, __b, __c) \
3909 (poly8x8_t)__builtin_neon_vbslv8qi (__a, __b, __c)
3911 #define vbsl_p16(__a, __b, __c) \
3912 (poly16x4_t)__builtin_neon_vbslv4hi (__a, __b, __c)
3914 #define vbslq_s8(__a, __b, __c) \
3915 (int8x16_t)__builtin_neon_vbslv16qi (__a, __b, __c)
3917 #define vbslq_s16(__a, __b, __c) \
3918 (int16x8_t)__builtin_neon_vbslv8hi (__a, __b, __c)
3920 #define vbslq_s32(__a, __b, __c) \
3921 (int32x4_t)__builtin_neon_vbslv4si (__a, __b, __c)
3923 #define vbslq_s64(__a, __b, __c) \
3924 (int64x2_t)__builtin_neon_vbslv2di (__a, __b, __c)
3926 #define vbslq_f32(__a, __b, __c) \
3927 (float32x4_t)__builtin_neon_vbslv4sf (__a, __b, __c)
3929 #define vbslq_u8(__a, __b, __c) \
3930 (uint8x16_t)__builtin_neon_vbslv16qi (__a, __b, __c)
3932 #define vbslq_u16(__a, __b, __c) \
3933 (uint16x8_t)__builtin_neon_vbslv8hi (__a, __b, __c)
3935 #define vbslq_u32(__a, __b, __c) \
3936 (uint32x4_t)__builtin_neon_vbslv4si (__a, __b, __c)
3938 #define vbslq_u64(__a, __b, __c) \
3939 (uint64x2_t)__builtin_neon_vbslv2di (__a, __b, __c)
3941 #define vbslq_p8(__a, __b, __c) \
3942 (poly8x16_t)__builtin_neon_vbslv16qi (__a, __b, __c)
3944 #define vbslq_p16(__a, __b, __c) \
3945 (poly16x8_t)__builtin_neon_vbslv8hi (__a, __b, __c)
3947 #define vtrn_s8(__a, __b) \
3949 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
3950 __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \
3954 #define vtrn_s16(__a, __b) \
3956 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
3957 __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \
3961 #define vtrn_s32(__a, __b) \
3963 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
3964 __rv.__o = __builtin_neon_vtrnv2si (__a, __b); \
3968 #define vtrn_f32(__a, __b) \
3970 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
3971 __rv.__o = __builtin_neon_vtrnv2sf (__a, __b); \
3975 #define vtrn_u8(__a, __b) \
3977 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
3978 __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \
3982 #define vtrn_u16(__a, __b) \
3984 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
3985 __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \
3989 #define vtrn_u32(__a, __b) \
3991 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
3992 __rv.__o = __builtin_neon_vtrnv2si (__a, __b); \
3996 #define vtrn_p8(__a, __b) \
3998 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
3999 __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \
4003 #define vtrn_p16(__a, __b) \
4005 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4006 __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \
4010 #define vtrnq_s8(__a, __b) \
4012 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4013 __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \
4017 #define vtrnq_s16(__a, __b) \
4019 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4020 __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \
4024 #define vtrnq_s32(__a, __b) \
4026 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4027 __rv.__o = __builtin_neon_vtrnv4si (__a, __b); \
4031 #define vtrnq_f32(__a, __b) \
4033 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4034 __rv.__o = __builtin_neon_vtrnv4sf (__a, __b); \
4038 #define vtrnq_u8(__a, __b) \
4040 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4041 __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \
4045 #define vtrnq_u16(__a, __b) \
4047 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4048 __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \
4052 #define vtrnq_u32(__a, __b) \
4054 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4055 __rv.__o = __builtin_neon_vtrnv4si (__a, __b); \
4059 #define vtrnq_p8(__a, __b) \
4061 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4062 __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \
4066 #define vtrnq_p16(__a, __b) \
4068 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4069 __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \
4073 #define vzip_s8(__a, __b) \
4075 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4076 __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \
4080 #define vzip_s16(__a, __b) \
4082 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4083 __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \
4087 #define vzip_s32(__a, __b) \
4089 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4090 __rv.__o = __builtin_neon_vzipv2si (__a, __b); \
4094 #define vzip_f32(__a, __b) \
4096 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4097 __rv.__o = __builtin_neon_vzipv2sf (__a, __b); \
4101 #define vzip_u8(__a, __b) \
4103 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4104 __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \
4108 #define vzip_u16(__a, __b) \
4110 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4111 __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \
4115 #define vzip_u32(__a, __b) \
4117 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4118 __rv.__o = __builtin_neon_vzipv2si (__a, __b); \
4122 #define vzip_p8(__a, __b) \
4124 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4125 __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \
4129 #define vzip_p16(__a, __b) \
4131 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4132 __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \
4136 #define vzipq_s8(__a, __b) \
4138 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4139 __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \
4143 #define vzipq_s16(__a, __b) \
4145 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4146 __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \
4150 #define vzipq_s32(__a, __b) \
4152 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4153 __rv.__o = __builtin_neon_vzipv4si (__a, __b); \
4157 #define vzipq_f32(__a, __b) \
4159 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4160 __rv.__o = __builtin_neon_vzipv4sf (__a, __b); \
4164 #define vzipq_u8(__a, __b) \
4166 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4167 __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \
4171 #define vzipq_u16(__a, __b) \
4173 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4174 __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \
4178 #define vzipq_u32(__a, __b) \
4180 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4181 __rv.__o = __builtin_neon_vzipv4si (__a, __b); \
4185 #define vzipq_p8(__a, __b) \
4187 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4188 __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \
4192 #define vzipq_p16(__a, __b) \
4194 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4195 __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \
4199 #define vuzp_s8(__a, __b) \
4201 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4202 __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \
4206 #define vuzp_s16(__a, __b) \
4208 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4209 __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \
4213 #define vuzp_s32(__a, __b) \
4215 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4216 __rv.__o = __builtin_neon_vuzpv2si (__a, __b); \
4220 #define vuzp_f32(__a, __b) \
4222 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4223 __rv.__o = __builtin_neon_vuzpv2sf (__a, __b); \
4227 #define vuzp_u8(__a, __b) \
4229 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4230 __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \
4234 #define vuzp_u16(__a, __b) \
4236 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4237 __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \
4241 #define vuzp_u32(__a, __b) \
4243 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4244 __rv.__o = __builtin_neon_vuzpv2si (__a, __b); \
4248 #define vuzp_p8(__a, __b) \
4250 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4251 __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \
4255 #define vuzp_p16(__a, __b) \
4257 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4258 __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \
4262 #define vuzpq_s8(__a, __b) \
4264 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4265 __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \
4269 #define vuzpq_s16(__a, __b) \
4271 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4272 __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \
4276 #define vuzpq_s32(__a, __b) \
4278 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4279 __rv.__o = __builtin_neon_vuzpv4si (__a, __b); \
4283 #define vuzpq_f32(__a, __b) \
4285 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4286 __rv.__o = __builtin_neon_vuzpv4sf (__a, __b); \
4290 #define vuzpq_u8(__a, __b) \
4292 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4293 __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \
4297 #define vuzpq_u16(__a, __b) \
4299 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4300 __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \
4304 #define vuzpq_u32(__a, __b) \
4306 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4307 __rv.__o = __builtin_neon_vuzpv4si (__a, __b); \
4311 #define vuzpq_p8(__a, __b) \
4313 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4314 __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \
4318 #define vuzpq_p16(__a, __b) \
4320 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4321 __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \
4325 #define vld1_s8(__a) \
4326 (int8x8_t)__builtin_neon_vld1v8qi (__a)
4328 #define vld1_s16(__a) \
4329 (int16x4_t)__builtin_neon_vld1v4hi (__a)
4331 #define vld1_s32(__a) \
4332 (int32x2_t)__builtin_neon_vld1v2si (__a)
4334 #define vld1_s64(__a) \
4335 (int64x1_t)__builtin_neon_vld1v1di (__a)
4337 #define vld1_f32(__a) \
4338 (float32x2_t)__builtin_neon_vld1v2sf (__a)
4340 #define vld1_u8(__a) \
4341 (uint8x8_t)__builtin_neon_vld1v8qi (__a)
4343 #define vld1_u16(__a) \
4344 (uint16x4_t)__builtin_neon_vld1v4hi (__a)
4346 #define vld1_u32(__a) \
4347 (uint32x2_t)__builtin_neon_vld1v2si (__a)
4349 #define vld1_u64(__a) \
4350 (uint64x1_t)__builtin_neon_vld1v1di (__a)
4352 #define vld1_p8(__a) \
4353 (poly8x8_t)__builtin_neon_vld1v8qi (__a)
4355 #define vld1_p16(__a) \
4356 (poly16x4_t)__builtin_neon_vld1v4hi (__a)
4358 #define vld1q_s8(__a) \
4359 (int8x16_t)__builtin_neon_vld1v16qi (__a)
4361 #define vld1q_s16(__a) \
4362 (int16x8_t)__builtin_neon_vld1v8hi (__a)
4364 #define vld1q_s32(__a) \
4365 (int32x4_t)__builtin_neon_vld1v4si (__a)
4367 #define vld1q_s64(__a) \
4368 (int64x2_t)__builtin_neon_vld1v2di (__a)
4370 #define vld1q_f32(__a) \
4371 (float32x4_t)__builtin_neon_vld1v4sf (__a)
4373 #define vld1q_u8(__a) \
4374 (uint8x16_t)__builtin_neon_vld1v16qi (__a)
4376 #define vld1q_u16(__a) \
4377 (uint16x8_t)__builtin_neon_vld1v8hi (__a)
4379 #define vld1q_u32(__a) \
4380 (uint32x4_t)__builtin_neon_vld1v4si (__a)
4382 #define vld1q_u64(__a) \
4383 (uint64x2_t)__builtin_neon_vld1v2di (__a)
4385 #define vld1q_p8(__a) \
4386 (poly8x16_t)__builtin_neon_vld1v16qi (__a)
4388 #define vld1q_p16(__a) \
4389 (poly16x8_t)__builtin_neon_vld1v8hi (__a)
4391 #define vld1_lane_s8(__a, __b, __c) \
4392 (int8x8_t)__builtin_neon_vld1_lanev8qi (__a, __b, __c)
4394 #define vld1_lane_s16(__a, __b, __c) \
4395 (int16x4_t)__builtin_neon_vld1_lanev4hi (__a, __b, __c)
4397 #define vld1_lane_s32(__a, __b, __c) \
4398 (int32x2_t)__builtin_neon_vld1_lanev2si (__a, __b, __c)
4400 #define vld1_lane_f32(__a, __b, __c) \
4401 (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c)
4403 #define vld1_lane_u8(__a, __b, __c) \
4404 (uint8x8_t)__builtin_neon_vld1_lanev8qi (__a, __b, __c)
4406 #define vld1_lane_u16(__a, __b, __c) \
4407 (uint16x4_t)__builtin_neon_vld1_lanev4hi (__a, __b, __c)
4409 #define vld1_lane_u32(__a, __b, __c) \
4410 (uint32x2_t)__builtin_neon_vld1_lanev2si (__a, __b, __c)
4412 #define vld1_lane_p8(__a, __b, __c) \
4413 (poly8x8_t)__builtin_neon_vld1_lanev8qi (__a, __b, __c)
4415 #define vld1_lane_p16(__a, __b, __c) \
4416 (poly16x4_t)__builtin_neon_vld1_lanev4hi (__a, __b, __c)
4418 #define vld1_lane_s64(__a, __b, __c) \
4419 (int64x1_t)__builtin_neon_vld1_lanev1di (__a, __b, __c)
4421 #define vld1_lane_u64(__a, __b, __c) \
4422 (uint64x1_t)__builtin_neon_vld1_lanev1di (__a, __b, __c)
4424 #define vld1q_lane_s8(__a, __b, __c) \
4425 (int8x16_t)__builtin_neon_vld1_lanev16qi (__a, __b, __c)
4427 #define vld1q_lane_s16(__a, __b, __c) \
4428 (int16x8_t)__builtin_neon_vld1_lanev8hi (__a, __b, __c)
4430 #define vld1q_lane_s32(__a, __b, __c) \
4431 (int32x4_t)__builtin_neon_vld1_lanev4si (__a, __b, __c)
4433 #define vld1q_lane_f32(__a, __b, __c) \
4434 (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c)
4436 #define vld1q_lane_u8(__a, __b, __c) \
4437 (uint8x16_t)__builtin_neon_vld1_lanev16qi (__a, __b, __c)
4439 #define vld1q_lane_u16(__a, __b, __c) \
4440 (uint16x8_t)__builtin_neon_vld1_lanev8hi (__a, __b, __c)
4442 #define vld1q_lane_u32(__a, __b, __c) \
4443 (uint32x4_t)__builtin_neon_vld1_lanev4si (__a, __b, __c)
4445 #define vld1q_lane_p8(__a, __b, __c) \
4446 (poly8x16_t)__builtin_neon_vld1_lanev16qi (__a, __b, __c)
4448 #define vld1q_lane_p16(__a, __b, __c) \
4449 (poly16x8_t)__builtin_neon_vld1_lanev8hi (__a, __b, __c)
4451 #define vld1q_lane_s64(__a, __b, __c) \
4452 (int64x2_t)__builtin_neon_vld1_lanev2di (__a, __b, __c)
4454 #define vld1q_lane_u64(__a, __b, __c) \
4455 (uint64x2_t)__builtin_neon_vld1_lanev2di (__a, __b, __c)
4457 #define vld1_dup_s8(__a) \
4458 (int8x8_t)__builtin_neon_vld1_dupv8qi (__a)
4460 #define vld1_dup_s16(__a) \
4461 (int16x4_t)__builtin_neon_vld1_dupv4hi (__a)
4463 #define vld1_dup_s32(__a) \
4464 (int32x2_t)__builtin_neon_vld1_dupv2si (__a)
4466 #define vld1_dup_f32(__a) \
4467 (float32x2_t)__builtin_neon_vld1_dupv2sf (__a)
4469 #define vld1_dup_u8(__a) \
4470 (uint8x8_t)__builtin_neon_vld1_dupv8qi (__a)
4472 #define vld1_dup_u16(__a) \
4473 (uint16x4_t)__builtin_neon_vld1_dupv4hi (__a)
4475 #define vld1_dup_u32(__a) \
4476 (uint32x2_t)__builtin_neon_vld1_dupv2si (__a)
4478 #define vld1_dup_p8(__a) \
4479 (poly8x8_t)__builtin_neon_vld1_dupv8qi (__a)
4481 #define vld1_dup_p16(__a) \
4482 (poly16x4_t)__builtin_neon_vld1_dupv4hi (__a)
4484 #define vld1_dup_s64(__a) \
4485 (int64x1_t)__builtin_neon_vld1_dupv1di (__a)
4487 #define vld1_dup_u64(__a) \
4488 (uint64x1_t)__builtin_neon_vld1_dupv1di (__a)
4490 #define vld1q_dup_s8(__a) \
4491 (int8x16_t)__builtin_neon_vld1_dupv16qi (__a)
4493 #define vld1q_dup_s16(__a) \
4494 (int16x8_t)__builtin_neon_vld1_dupv8hi (__a)
4496 #define vld1q_dup_s32(__a) \
4497 (int32x4_t)__builtin_neon_vld1_dupv4si (__a)
4499 #define vld1q_dup_f32(__a) \
4500 (float32x4_t)__builtin_neon_vld1_dupv4sf (__a)
4502 #define vld1q_dup_u8(__a) \
4503 (uint8x16_t)__builtin_neon_vld1_dupv16qi (__a)
4505 #define vld1q_dup_u16(__a) \
4506 (uint16x8_t)__builtin_neon_vld1_dupv8hi (__a)
4508 #define vld1q_dup_u32(__a) \
4509 (uint32x4_t)__builtin_neon_vld1_dupv4si (__a)
4511 #define vld1q_dup_p8(__a) \
4512 (poly8x16_t)__builtin_neon_vld1_dupv16qi (__a)
4514 #define vld1q_dup_p16(__a) \
4515 (poly16x8_t)__builtin_neon_vld1_dupv8hi (__a)
4517 #define vld1q_dup_s64(__a) \
4518 (int64x2_t)__builtin_neon_vld1_dupv2di (__a)
4520 #define vld1q_dup_u64(__a) \
4521 (uint64x2_t)__builtin_neon_vld1_dupv2di (__a)
4523 #define vst1_s8(__a, __b) \
4524 __builtin_neon_vst1v8qi (__a, __b)
4526 #define vst1_s16(__a, __b) \
4527 __builtin_neon_vst1v4hi (__a, __b)
4529 #define vst1_s32(__a, __b) \
4530 __builtin_neon_vst1v2si (__a, __b)
4532 #define vst1_s64(__a, __b) \
4533 __builtin_neon_vst1v1di (__a, __b)
4535 #define vst1_f32(__a, __b) \
4536 __builtin_neon_vst1v2sf (__a, __b)
4538 #define vst1_u8(__a, __b) \
4539 __builtin_neon_vst1v8qi (__a, __b)
4541 #define vst1_u16(__a, __b) \
4542 __builtin_neon_vst1v4hi (__a, __b)
4544 #define vst1_u32(__a, __b) \
4545 __builtin_neon_vst1v2si (__a, __b)
4547 #define vst1_u64(__a, __b) \
4548 __builtin_neon_vst1v1di (__a, __b)
4550 #define vst1_p8(__a, __b) \
4551 __builtin_neon_vst1v8qi (__a, __b)
4553 #define vst1_p16(__a, __b) \
4554 __builtin_neon_vst1v4hi (__a, __b)
4556 #define vst1q_s8(__a, __b) \
4557 __builtin_neon_vst1v16qi (__a, __b)
4559 #define vst1q_s16(__a, __b) \
4560 __builtin_neon_vst1v8hi (__a, __b)
4562 #define vst1q_s32(__a, __b) \
4563 __builtin_neon_vst1v4si (__a, __b)
4565 #define vst1q_s64(__a, __b) \
4566 __builtin_neon_vst1v2di (__a, __b)
4568 #define vst1q_f32(__a, __b) \
4569 __builtin_neon_vst1v4sf (__a, __b)
4571 #define vst1q_u8(__a, __b) \
4572 __builtin_neon_vst1v16qi (__a, __b)
4574 #define vst1q_u16(__a, __b) \
4575 __builtin_neon_vst1v8hi (__a, __b)
4577 #define vst1q_u32(__a, __b) \
4578 __builtin_neon_vst1v4si (__a, __b)
4580 #define vst1q_u64(__a, __b) \
4581 __builtin_neon_vst1v2di (__a, __b)
4583 #define vst1q_p8(__a, __b) \
4584 __builtin_neon_vst1v16qi (__a, __b)
4586 #define vst1q_p16(__a, __b) \
4587 __builtin_neon_vst1v8hi (__a, __b)
4589 #define vst1_lane_s8(__a, __b, __c) \
4590 __builtin_neon_vst1_lanev8qi (__a, __b, __c)
4592 #define vst1_lane_s16(__a, __b, __c) \
4593 __builtin_neon_vst1_lanev4hi (__a, __b, __c)
4595 #define vst1_lane_s32(__a, __b, __c) \
4596 __builtin_neon_vst1_lanev2si (__a, __b, __c)
4598 #define vst1_lane_f32(__a, __b, __c) \
4599 __builtin_neon_vst1_lanev2sf (__a, __b, __c)
4601 #define vst1_lane_u8(__a, __b, __c) \
4602 __builtin_neon_vst1_lanev8qi (__a, __b, __c)
4604 #define vst1_lane_u16(__a, __b, __c) \
4605 __builtin_neon_vst1_lanev4hi (__a, __b, __c)
4607 #define vst1_lane_u32(__a, __b, __c) \
4608 __builtin_neon_vst1_lanev2si (__a, __b, __c)
4610 #define vst1_lane_p8(__a, __b, __c) \
4611 __builtin_neon_vst1_lanev8qi (__a, __b, __c)
4613 #define vst1_lane_p16(__a, __b, __c) \
4614 __builtin_neon_vst1_lanev4hi (__a, __b, __c)
4616 #define vst1_lane_s64(__a, __b, __c) \
4617 __builtin_neon_vst1_lanev1di (__a, __b, __c)
4619 #define vst1_lane_u64(__a, __b, __c) \
4620 __builtin_neon_vst1_lanev1di (__a, __b, __c)
4622 #define vst1q_lane_s8(__a, __b, __c) \
4623 __builtin_neon_vst1_lanev16qi (__a, __b, __c)
4625 #define vst1q_lane_s16(__a, __b, __c) \
4626 __builtin_neon_vst1_lanev8hi (__a, __b, __c)
4628 #define vst1q_lane_s32(__a, __b, __c) \
4629 __builtin_neon_vst1_lanev4si (__a, __b, __c)
4631 #define vst1q_lane_f32(__a, __b, __c) \
4632 __builtin_neon_vst1_lanev4sf (__a, __b, __c)
4634 #define vst1q_lane_u8(__a, __b, __c) \
4635 __builtin_neon_vst1_lanev16qi (__a, __b, __c)
4637 #define vst1q_lane_u16(__a, __b, __c) \
4638 __builtin_neon_vst1_lanev8hi (__a, __b, __c)
4640 #define vst1q_lane_u32(__a, __b, __c) \
4641 __builtin_neon_vst1_lanev4si (__a, __b, __c)
4643 #define vst1q_lane_p8(__a, __b, __c) \
4644 __builtin_neon_vst1_lanev16qi (__a, __b, __c)
4646 #define vst1q_lane_p16(__a, __b, __c) \
4647 __builtin_neon_vst1_lanev8hi (__a, __b, __c)
4649 #define vst1q_lane_s64(__a, __b, __c) \
4650 __builtin_neon_vst1_lanev2di (__a, __b, __c)
4652 #define vst1q_lane_u64(__a, __b, __c) \
4653 __builtin_neon_vst1_lanev2di (__a, __b, __c)
4655 #define vld2_s8(__a) \
4657 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4658 __rv.__o = __builtin_neon_vld2v8qi (__a); \
4662 #define vld2_s16(__a) \
4664 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4665 __rv.__o = __builtin_neon_vld2v4hi (__a); \
4669 #define vld2_s32(__a) \
4671 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4672 __rv.__o = __builtin_neon_vld2v2si (__a); \
4676 #define vld2_f32(__a) \
4678 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4679 __rv.__o = __builtin_neon_vld2v2sf (__a); \
4683 #define vld2_u8(__a) \
4685 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4686 __rv.__o = __builtin_neon_vld2v8qi (__a); \
4690 #define vld2_u16(__a) \
4692 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4693 __rv.__o = __builtin_neon_vld2v4hi (__a); \
4697 #define vld2_u32(__a) \
4699 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4700 __rv.__o = __builtin_neon_vld2v2si (__a); \
4704 #define vld2_p8(__a) \
4706 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4707 __rv.__o = __builtin_neon_vld2v8qi (__a); \
4711 #define vld2_p16(__a) \
4713 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4714 __rv.__o = __builtin_neon_vld2v4hi (__a); \
4718 #define vld2_s64(__a) \
4720 union { int64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4721 __rv.__o = __builtin_neon_vld2v1di (__a); \
4725 #define vld2_u64(__a) \
4727 union { uint64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4728 __rv.__o = __builtin_neon_vld2v1di (__a); \
4732 #define vld2q_s8(__a) \
4734 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4735 __rv.__o = __builtin_neon_vld2v16qi (__a); \
4739 #define vld2q_s16(__a) \
4741 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4742 __rv.__o = __builtin_neon_vld2v8hi (__a); \
4746 #define vld2q_s32(__a) \
4748 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4749 __rv.__o = __builtin_neon_vld2v4si (__a); \
4753 #define vld2q_f32(__a) \
4755 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4756 __rv.__o = __builtin_neon_vld2v4sf (__a); \
4760 #define vld2q_u8(__a) \
4762 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4763 __rv.__o = __builtin_neon_vld2v16qi (__a); \
4767 #define vld2q_u16(__a) \
4769 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4770 __rv.__o = __builtin_neon_vld2v8hi (__a); \
4774 #define vld2q_u32(__a) \
4776 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4777 __rv.__o = __builtin_neon_vld2v4si (__a); \
4781 #define vld2q_p8(__a) \
4783 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __rv; \
4784 __rv.__o = __builtin_neon_vld2v16qi (__a); \
4788 #define vld2q_p16(__a) \
4790 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4791 __rv.__o = __builtin_neon_vld2v8hi (__a); \
4795 #define vld2_lane_s8(__a, __b, __c) \
4797 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4798 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4799 __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \
4803 #define vld2_lane_s16(__a, __b, __c) \
4805 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
4806 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4807 __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \
4811 #define vld2_lane_s32(__a, __b, __c) \
4813 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
4814 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4815 __rv.__o = __builtin_neon_vld2_lanev2si (__a, __bu.__o, __c); \
4819 #define vld2_lane_f32(__a, __b, __c) \
4821 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __bu = { __b }; \
4822 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4823 __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); \
4827 #define vld2_lane_u8(__a, __b, __c) \
4829 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4830 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4831 __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \
4835 #define vld2_lane_u16(__a, __b, __c) \
4837 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
4838 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4839 __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \
4843 #define vld2_lane_u32(__a, __b, __c) \
4845 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
4846 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4847 __rv.__o = __builtin_neon_vld2_lanev2si (__a, __bu.__o, __c); \
4851 #define vld2_lane_p8(__a, __b, __c) \
4853 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4854 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4855 __rv.__o = __builtin_neon_vld2_lanev8qi (__a, __bu.__o, __c); \
4859 #define vld2_lane_p16(__a, __b, __c) \
4861 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
4862 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4863 __rv.__o = __builtin_neon_vld2_lanev4hi (__a, __bu.__o, __c); \
4867 #define vld2q_lane_s16(__a, __b, __c) \
4869 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
4870 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4871 __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \
4875 #define vld2q_lane_s32(__a, __b, __c) \
4877 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
4878 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4879 __rv.__o = __builtin_neon_vld2_lanev4si (__a, __bu.__o, __c); \
4883 #define vld2q_lane_f32(__a, __b, __c) \
4885 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __bu = { __b }; \
4886 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __rv; \
4887 __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); \
4891 #define vld2q_lane_u16(__a, __b, __c) \
4893 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
4894 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4895 __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \
4899 #define vld2q_lane_u32(__a, __b, __c) \
4901 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
4902 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __rv; \
4903 __rv.__o = __builtin_neon_vld2_lanev4si (__a, __bu.__o, __c); \
4907 #define vld2q_lane_p16(__a, __b, __c) \
4909 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
4910 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __rv; \
4911 __rv.__o = __builtin_neon_vld2_lanev8hi (__a, __bu.__o, __c); \
4915 #define vld2_dup_s8(__a) \
4917 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4918 __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \
4922 #define vld2_dup_s16(__a) \
4924 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4925 __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \
4929 #define vld2_dup_s32(__a) \
4931 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4932 __rv.__o = __builtin_neon_vld2_dupv2si (__a); \
4936 #define vld2_dup_f32(__a) \
4938 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __rv; \
4939 __rv.__o = __builtin_neon_vld2_dupv2sf (__a); \
4943 #define vld2_dup_u8(__a) \
4945 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4946 __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \
4950 #define vld2_dup_u16(__a) \
4952 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4953 __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \
4957 #define vld2_dup_u32(__a) \
4959 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __rv; \
4960 __rv.__o = __builtin_neon_vld2_dupv2si (__a); \
4964 #define vld2_dup_p8(__a) \
4966 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __rv; \
4967 __rv.__o = __builtin_neon_vld2_dupv8qi (__a); \
4971 #define vld2_dup_p16(__a) \
4973 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __rv; \
4974 __rv.__o = __builtin_neon_vld2_dupv4hi (__a); \
4978 #define vld2_dup_s64(__a) \
4980 union { int64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4981 __rv.__o = __builtin_neon_vld2_dupv1di (__a); \
4985 #define vld2_dup_u64(__a) \
4987 union { uint64x1x2_t __i; __builtin_neon_v1di2 __o; } __rv; \
4988 __rv.__o = __builtin_neon_vld2_dupv1di (__a); \
4992 #define vst2_s8(__a, __b) \
4994 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
4995 __builtin_neon_vst2v8qi (__a, __bu.__o); \
4998 #define vst2_s16(__a, __b) \
5000 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5001 __builtin_neon_vst2v4hi (__a, __bu.__o); \
5004 #define vst2_s32(__a, __b) \
5006 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5007 __builtin_neon_vst2v2si (__a, __bu.__o); \
5010 #define vst2_f32(__a, __b) \
5012 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __bu = { __b }; \
5013 __builtin_neon_vst2v2sf (__a, __bu.__o); \
5016 #define vst2_u8(__a, __b) \
5018 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5019 __builtin_neon_vst2v8qi (__a, __bu.__o); \
5022 #define vst2_u16(__a, __b) \
5024 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5025 __builtin_neon_vst2v4hi (__a, __bu.__o); \
5028 #define vst2_u32(__a, __b) \
5030 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5031 __builtin_neon_vst2v2si (__a, __bu.__o); \
5034 #define vst2_p8(__a, __b) \
5036 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5037 __builtin_neon_vst2v8qi (__a, __bu.__o); \
5040 #define vst2_p16(__a, __b) \
5042 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5043 __builtin_neon_vst2v4hi (__a, __bu.__o); \
5046 #define vst2_s64(__a, __b) \
5048 union { int64x1x2_t __i; __builtin_neon_v1di2 __o; } __bu = { __b }; \
5049 __builtin_neon_vst2v1di (__a, __bu.__o); \
5052 #define vst2_u64(__a, __b) \
5054 union { uint64x1x2_t __i; __builtin_neon_v1di2 __o; } __bu = { __b }; \
5055 __builtin_neon_vst2v1di (__a, __bu.__o); \
5058 #define vst2q_s8(__a, __b) \
5060 union { int8x16x2_t __i; __builtin_neon_v16qi2 __o; } __bu = { __b }; \
5061 __builtin_neon_vst2v16qi (__a, __bu.__o); \
5064 #define vst2q_s16(__a, __b) \
5066 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5067 __builtin_neon_vst2v8hi (__a, __bu.__o); \
5070 #define vst2q_s32(__a, __b) \
5072 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5073 __builtin_neon_vst2v4si (__a, __bu.__o); \
5076 #define vst2q_f32(__a, __b) \
5078 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __bu = { __b }; \
5079 __builtin_neon_vst2v4sf (__a, __bu.__o); \
5082 #define vst2q_u8(__a, __b) \
5084 union { uint8x16x2_t __i; __builtin_neon_v16qi2 __o; } __bu = { __b }; \
5085 __builtin_neon_vst2v16qi (__a, __bu.__o); \
5088 #define vst2q_u16(__a, __b) \
5090 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5091 __builtin_neon_vst2v8hi (__a, __bu.__o); \
5094 #define vst2q_u32(__a, __b) \
5096 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5097 __builtin_neon_vst2v4si (__a, __bu.__o); \
5100 #define vst2q_p8(__a, __b) \
5102 union { poly8x16x2_t __i; __builtin_neon_v16qi2 __o; } __bu = { __b }; \
5103 __builtin_neon_vst2v16qi (__a, __bu.__o); \
5106 #define vst2q_p16(__a, __b) \
5108 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5109 __builtin_neon_vst2v8hi (__a, __bu.__o); \
5112 #define vst2_lane_s8(__a, __b, __c) \
5114 union { int8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5115 __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \
5118 #define vst2_lane_s16(__a, __b, __c) \
5120 union { int16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5121 __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \
5124 #define vst2_lane_s32(__a, __b, __c) \
5126 union { int32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5127 __builtin_neon_vst2_lanev2si (__a, __bu.__o, __c); \
5130 #define vst2_lane_f32(__a, __b, __c) \
5132 union { float32x2x2_t __i; __builtin_neon_v2sf2 __o; } __bu = { __b }; \
5133 __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); \
5136 #define vst2_lane_u8(__a, __b, __c) \
5138 union { uint8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5139 __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \
5142 #define vst2_lane_u16(__a, __b, __c) \
5144 union { uint16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5145 __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \
5148 #define vst2_lane_u32(__a, __b, __c) \
5150 union { uint32x2x2_t __i; __builtin_neon_v2si2 __o; } __bu = { __b }; \
5151 __builtin_neon_vst2_lanev2si (__a, __bu.__o, __c); \
5154 #define vst2_lane_p8(__a, __b, __c) \
5156 union { poly8x8x2_t __i; __builtin_neon_v8qi2 __o; } __bu = { __b }; \
5157 __builtin_neon_vst2_lanev8qi (__a, __bu.__o, __c); \
5160 #define vst2_lane_p16(__a, __b, __c) \
5162 union { poly16x4x2_t __i; __builtin_neon_v4hi2 __o; } __bu = { __b }; \
5163 __builtin_neon_vst2_lanev4hi (__a, __bu.__o, __c); \
5166 #define vst2q_lane_s16(__a, __b, __c) \
5168 union { int16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5169 __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \
5172 #define vst2q_lane_s32(__a, __b, __c) \
5174 union { int32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5175 __builtin_neon_vst2_lanev4si (__a, __bu.__o, __c); \
5178 #define vst2q_lane_f32(__a, __b, __c) \
5180 union { float32x4x2_t __i; __builtin_neon_v4sf2 __o; } __bu = { __b }; \
5181 __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); \
5184 #define vst2q_lane_u16(__a, __b, __c) \
5186 union { uint16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5187 __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \
5190 #define vst2q_lane_u32(__a, __b, __c) \
5192 union { uint32x4x2_t __i; __builtin_neon_v4si2 __o; } __bu = { __b }; \
5193 __builtin_neon_vst2_lanev4si (__a, __bu.__o, __c); \
5196 #define vst2q_lane_p16(__a, __b, __c) \
5198 union { poly16x8x2_t __i; __builtin_neon_v8hi2 __o; } __bu = { __b }; \
5199 __builtin_neon_vst2_lanev8hi (__a, __bu.__o, __c); \
5202 #define vld3_s8(__a) \
5204 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5205 __rv.__o = __builtin_neon_vld3v8qi (__a); \
5209 #define vld3_s16(__a) \
5211 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5212 __rv.__o = __builtin_neon_vld3v4hi (__a); \
5216 #define vld3_s32(__a) \
5218 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5219 __rv.__o = __builtin_neon_vld3v2si (__a); \
5223 #define vld3_f32(__a) \
5225 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __rv; \
5226 __rv.__o = __builtin_neon_vld3v2sf (__a); \
5230 #define vld3_u8(__a) \
5232 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5233 __rv.__o = __builtin_neon_vld3v8qi (__a); \
5237 #define vld3_u16(__a) \
5239 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5240 __rv.__o = __builtin_neon_vld3v4hi (__a); \
5244 #define vld3_u32(__a) \
5246 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5247 __rv.__o = __builtin_neon_vld3v2si (__a); \
5251 #define vld3_p8(__a) \
5253 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5254 __rv.__o = __builtin_neon_vld3v8qi (__a); \
5258 #define vld3_p16(__a) \
5260 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5261 __rv.__o = __builtin_neon_vld3v4hi (__a); \
5265 #define vld3_s64(__a) \
5267 union { int64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5268 __rv.__o = __builtin_neon_vld3v1di (__a); \
5272 #define vld3_u64(__a) \
5274 union { uint64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5275 __rv.__o = __builtin_neon_vld3v1di (__a); \
5279 #define vld3q_s8(__a) \
5281 union { int8x16x3_t __i; __builtin_neon_v16qi3 __o; } __rv; \
5282 __rv.__o = __builtin_neon_vld3v16qi (__a); \
5286 #define vld3q_s16(__a) \
5288 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5289 __rv.__o = __builtin_neon_vld3v8hi (__a); \
5293 #define vld3q_s32(__a) \
5295 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5296 __rv.__o = __builtin_neon_vld3v4si (__a); \
5300 #define vld3q_f32(__a) \
5302 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __rv; \
5303 __rv.__o = __builtin_neon_vld3v4sf (__a); \
5307 #define vld3q_u8(__a) \
5309 union { uint8x16x3_t __i; __builtin_neon_v16qi3 __o; } __rv; \
5310 __rv.__o = __builtin_neon_vld3v16qi (__a); \
5314 #define vld3q_u16(__a) \
5316 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5317 __rv.__o = __builtin_neon_vld3v8hi (__a); \
5321 #define vld3q_u32(__a) \
5323 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5324 __rv.__o = __builtin_neon_vld3v4si (__a); \
5328 #define vld3q_p8(__a) \
5330 union { poly8x16x3_t __i; __builtin_neon_v16qi3 __o; } __rv; \
5331 __rv.__o = __builtin_neon_vld3v16qi (__a); \
5335 #define vld3q_p16(__a) \
5337 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5338 __rv.__o = __builtin_neon_vld3v8hi (__a); \
5342 #define vld3_lane_s8(__a, __b, __c) \
5344 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5345 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5346 __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \
5350 #define vld3_lane_s16(__a, __b, __c) \
5352 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5353 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5354 __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \
5358 #define vld3_lane_s32(__a, __b, __c) \
5360 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5361 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5362 __rv.__o = __builtin_neon_vld3_lanev2si (__a, __bu.__o, __c); \
5366 #define vld3_lane_f32(__a, __b, __c) \
5368 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __bu = { __b }; \
5369 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __rv; \
5370 __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); \
5374 #define vld3_lane_u8(__a, __b, __c) \
5376 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5377 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5378 __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \
5382 #define vld3_lane_u16(__a, __b, __c) \
5384 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5385 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5386 __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \
5390 #define vld3_lane_u32(__a, __b, __c) \
5392 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5393 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5394 __rv.__o = __builtin_neon_vld3_lanev2si (__a, __bu.__o, __c); \
5398 #define vld3_lane_p8(__a, __b, __c) \
5400 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5401 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5402 __rv.__o = __builtin_neon_vld3_lanev8qi (__a, __bu.__o, __c); \
5406 #define vld3_lane_p16(__a, __b, __c) \
5408 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5409 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5410 __rv.__o = __builtin_neon_vld3_lanev4hi (__a, __bu.__o, __c); \
5414 #define vld3q_lane_s16(__a, __b, __c) \
5416 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5417 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5418 __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \
5422 #define vld3q_lane_s32(__a, __b, __c) \
5424 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5425 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5426 __rv.__o = __builtin_neon_vld3_lanev4si (__a, __bu.__o, __c); \
5430 #define vld3q_lane_f32(__a, __b, __c) \
5432 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __bu = { __b }; \
5433 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __rv; \
5434 __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); \
5438 #define vld3q_lane_u16(__a, __b, __c) \
5440 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5441 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5442 __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \
5446 #define vld3q_lane_u32(__a, __b, __c) \
5448 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5449 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __rv; \
5450 __rv.__o = __builtin_neon_vld3_lanev4si (__a, __bu.__o, __c); \
5454 #define vld3q_lane_p16(__a, __b, __c) \
5456 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5457 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __rv; \
5458 __rv.__o = __builtin_neon_vld3_lanev8hi (__a, __bu.__o, __c); \
5462 #define vld3_dup_s8(__a) \
5464 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5465 __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \
5469 #define vld3_dup_s16(__a) \
5471 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5472 __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \
5476 #define vld3_dup_s32(__a) \
5478 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5479 __rv.__o = __builtin_neon_vld3_dupv2si (__a); \
5483 #define vld3_dup_f32(__a) \
5485 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __rv; \
5486 __rv.__o = __builtin_neon_vld3_dupv2sf (__a); \
5490 #define vld3_dup_u8(__a) \
5492 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5493 __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \
5497 #define vld3_dup_u16(__a) \
5499 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5500 __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \
5504 #define vld3_dup_u32(__a) \
5506 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __rv; \
5507 __rv.__o = __builtin_neon_vld3_dupv2si (__a); \
5511 #define vld3_dup_p8(__a) \
5513 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __rv; \
5514 __rv.__o = __builtin_neon_vld3_dupv8qi (__a); \
5518 #define vld3_dup_p16(__a) \
5520 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __rv; \
5521 __rv.__o = __builtin_neon_vld3_dupv4hi (__a); \
5525 #define vld3_dup_s64(__a) \
5527 union { int64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5528 __rv.__o = __builtin_neon_vld3_dupv1di (__a); \
5532 #define vld3_dup_u64(__a) \
5534 union { uint64x1x3_t __i; __builtin_neon_v1di3 __o; } __rv; \
5535 __rv.__o = __builtin_neon_vld3_dupv1di (__a); \
5539 #define vst3_s8(__a, __b) \
5541 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5542 __builtin_neon_vst3v8qi (__a, __bu.__o); \
5545 #define vst3_s16(__a, __b) \
5547 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5548 __builtin_neon_vst3v4hi (__a, __bu.__o); \
5551 #define vst3_s32(__a, __b) \
5553 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5554 __builtin_neon_vst3v2si (__a, __bu.__o); \
5557 #define vst3_f32(__a, __b) \
5559 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __bu = { __b }; \
5560 __builtin_neon_vst3v2sf (__a, __bu.__o); \
5563 #define vst3_u8(__a, __b) \
5565 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5566 __builtin_neon_vst3v8qi (__a, __bu.__o); \
5569 #define vst3_u16(__a, __b) \
5571 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5572 __builtin_neon_vst3v4hi (__a, __bu.__o); \
5575 #define vst3_u32(__a, __b) \
5577 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5578 __builtin_neon_vst3v2si (__a, __bu.__o); \
5581 #define vst3_p8(__a, __b) \
5583 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5584 __builtin_neon_vst3v8qi (__a, __bu.__o); \
5587 #define vst3_p16(__a, __b) \
5589 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5590 __builtin_neon_vst3v4hi (__a, __bu.__o); \
5593 #define vst3_s64(__a, __b) \
5595 union { int64x1x3_t __i; __builtin_neon_v1di3 __o; } __bu = { __b }; \
5596 __builtin_neon_vst3v1di (__a, __bu.__o); \
5599 #define vst3_u64(__a, __b) \
5601 union { uint64x1x3_t __i; __builtin_neon_v1di3 __o; } __bu = { __b }; \
5602 __builtin_neon_vst3v1di (__a, __bu.__o); \
5605 #define vst3q_s8(__a, __b) \
5607 union { int8x16x3_t __i; __builtin_neon_v16qi3 __o; } __bu = { __b }; \
5608 __builtin_neon_vst3v16qi (__a, __bu.__o); \
5611 #define vst3q_s16(__a, __b) \
5613 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5614 __builtin_neon_vst3v8hi (__a, __bu.__o); \
5617 #define vst3q_s32(__a, __b) \
5619 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5620 __builtin_neon_vst3v4si (__a, __bu.__o); \
5623 #define vst3q_f32(__a, __b) \
5625 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __bu = { __b }; \
5626 __builtin_neon_vst3v4sf (__a, __bu.__o); \
5629 #define vst3q_u8(__a, __b) \
5631 union { uint8x16x3_t __i; __builtin_neon_v16qi3 __o; } __bu = { __b }; \
5632 __builtin_neon_vst3v16qi (__a, __bu.__o); \
5635 #define vst3q_u16(__a, __b) \
5637 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5638 __builtin_neon_vst3v8hi (__a, __bu.__o); \
5641 #define vst3q_u32(__a, __b) \
5643 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5644 __builtin_neon_vst3v4si (__a, __bu.__o); \
5647 #define vst3q_p8(__a, __b) \
5649 union { poly8x16x3_t __i; __builtin_neon_v16qi3 __o; } __bu = { __b }; \
5650 __builtin_neon_vst3v16qi (__a, __bu.__o); \
5653 #define vst3q_p16(__a, __b) \
5655 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5656 __builtin_neon_vst3v8hi (__a, __bu.__o); \
5659 #define vst3_lane_s8(__a, __b, __c) \
5661 union { int8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5662 __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \
5665 #define vst3_lane_s16(__a, __b, __c) \
5667 union { int16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5668 __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \
5671 #define vst3_lane_s32(__a, __b, __c) \
5673 union { int32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5674 __builtin_neon_vst3_lanev2si (__a, __bu.__o, __c); \
5677 #define vst3_lane_f32(__a, __b, __c) \
5679 union { float32x2x3_t __i; __builtin_neon_v2sf3 __o; } __bu = { __b }; \
5680 __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); \
5683 #define vst3_lane_u8(__a, __b, __c) \
5685 union { uint8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5686 __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \
5689 #define vst3_lane_u16(__a, __b, __c) \
5691 union { uint16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5692 __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \
5695 #define vst3_lane_u32(__a, __b, __c) \
5697 union { uint32x2x3_t __i; __builtin_neon_v2si3 __o; } __bu = { __b }; \
5698 __builtin_neon_vst3_lanev2si (__a, __bu.__o, __c); \
5701 #define vst3_lane_p8(__a, __b, __c) \
5703 union { poly8x8x3_t __i; __builtin_neon_v8qi3 __o; } __bu = { __b }; \
5704 __builtin_neon_vst3_lanev8qi (__a, __bu.__o, __c); \
5707 #define vst3_lane_p16(__a, __b, __c) \
5709 union { poly16x4x3_t __i; __builtin_neon_v4hi3 __o; } __bu = { __b }; \
5710 __builtin_neon_vst3_lanev4hi (__a, __bu.__o, __c); \
5713 #define vst3q_lane_s16(__a, __b, __c) \
5715 union { int16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5716 __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \
5719 #define vst3q_lane_s32(__a, __b, __c) \
5721 union { int32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5722 __builtin_neon_vst3_lanev4si (__a, __bu.__o, __c); \
5725 #define vst3q_lane_f32(__a, __b, __c) \
5727 union { float32x4x3_t __i; __builtin_neon_v4sf3 __o; } __bu = { __b }; \
5728 __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); \
5731 #define vst3q_lane_u16(__a, __b, __c) \
5733 union { uint16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5734 __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \
5737 #define vst3q_lane_u32(__a, __b, __c) \
5739 union { uint32x4x3_t __i; __builtin_neon_v4si3 __o; } __bu = { __b }; \
5740 __builtin_neon_vst3_lanev4si (__a, __bu.__o, __c); \
5743 #define vst3q_lane_p16(__a, __b, __c) \
5745 union { poly16x8x3_t __i; __builtin_neon_v8hi3 __o; } __bu = { __b }; \
5746 __builtin_neon_vst3_lanev8hi (__a, __bu.__o, __c); \
5749 #define vld4_s8(__a) \
5751 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5752 __rv.__o = __builtin_neon_vld4v8qi (__a); \
5756 #define vld4_s16(__a) \
5758 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5759 __rv.__o = __builtin_neon_vld4v4hi (__a); \
5763 #define vld4_s32(__a) \
5765 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5766 __rv.__o = __builtin_neon_vld4v2si (__a); \
5770 #define vld4_f32(__a) \
5772 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __rv; \
5773 __rv.__o = __builtin_neon_vld4v2sf (__a); \
5777 #define vld4_u8(__a) \
5779 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5780 __rv.__o = __builtin_neon_vld4v8qi (__a); \
5784 #define vld4_u16(__a) \
5786 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5787 __rv.__o = __builtin_neon_vld4v4hi (__a); \
5791 #define vld4_u32(__a) \
5793 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5794 __rv.__o = __builtin_neon_vld4v2si (__a); \
5798 #define vld4_p8(__a) \
5800 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5801 __rv.__o = __builtin_neon_vld4v8qi (__a); \
5805 #define vld4_p16(__a) \
5807 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5808 __rv.__o = __builtin_neon_vld4v4hi (__a); \
5812 #define vld4_s64(__a) \
5814 union { int64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
5815 __rv.__o = __builtin_neon_vld4v1di (__a); \
5819 #define vld4_u64(__a) \
5821 union { uint64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
5822 __rv.__o = __builtin_neon_vld4v1di (__a); \
5826 #define vld4q_s8(__a) \
5828 union { int8x16x4_t __i; __builtin_neon_v16qi4 __o; } __rv; \
5829 __rv.__o = __builtin_neon_vld4v16qi (__a); \
5833 #define vld4q_s16(__a) \
5835 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5836 __rv.__o = __builtin_neon_vld4v8hi (__a); \
5840 #define vld4q_s32(__a) \
5842 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5843 __rv.__o = __builtin_neon_vld4v4si (__a); \
5847 #define vld4q_f32(__a) \
5849 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __rv; \
5850 __rv.__o = __builtin_neon_vld4v4sf (__a); \
5854 #define vld4q_u8(__a) \
5856 union { uint8x16x4_t __i; __builtin_neon_v16qi4 __o; } __rv; \
5857 __rv.__o = __builtin_neon_vld4v16qi (__a); \
5861 #define vld4q_u16(__a) \
5863 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5864 __rv.__o = __builtin_neon_vld4v8hi (__a); \
5868 #define vld4q_u32(__a) \
5870 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5871 __rv.__o = __builtin_neon_vld4v4si (__a); \
5875 #define vld4q_p8(__a) \
5877 union { poly8x16x4_t __i; __builtin_neon_v16qi4 __o; } __rv; \
5878 __rv.__o = __builtin_neon_vld4v16qi (__a); \
5882 #define vld4q_p16(__a) \
5884 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5885 __rv.__o = __builtin_neon_vld4v8hi (__a); \
5889 #define vld4_lane_s8(__a, __b, __c) \
5891 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
5892 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5893 __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \
5897 #define vld4_lane_s16(__a, __b, __c) \
5899 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
5900 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5901 __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \
5905 #define vld4_lane_s32(__a, __b, __c) \
5907 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
5908 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5909 __rv.__o = __builtin_neon_vld4_lanev2si (__a, __bu.__o, __c); \
5913 #define vld4_lane_f32(__a, __b, __c) \
5915 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __bu = { __b }; \
5916 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __rv; \
5917 __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); \
5921 #define vld4_lane_u8(__a, __b, __c) \
5923 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
5924 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5925 __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \
5929 #define vld4_lane_u16(__a, __b, __c) \
5931 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
5932 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5933 __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \
5937 #define vld4_lane_u32(__a, __b, __c) \
5939 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
5940 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
5941 __rv.__o = __builtin_neon_vld4_lanev2si (__a, __bu.__o, __c); \
5945 #define vld4_lane_p8(__a, __b, __c) \
5947 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
5948 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
5949 __rv.__o = __builtin_neon_vld4_lanev8qi (__a, __bu.__o, __c); \
5953 #define vld4_lane_p16(__a, __b, __c) \
5955 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
5956 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
5957 __rv.__o = __builtin_neon_vld4_lanev4hi (__a, __bu.__o, __c); \
5961 #define vld4q_lane_s16(__a, __b, __c) \
5963 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
5964 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5965 __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \
5969 #define vld4q_lane_s32(__a, __b, __c) \
5971 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
5972 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5973 __rv.__o = __builtin_neon_vld4_lanev4si (__a, __bu.__o, __c); \
5977 #define vld4q_lane_f32(__a, __b, __c) \
5979 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __bu = { __b }; \
5980 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __rv; \
5981 __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); \
5985 #define vld4q_lane_u16(__a, __b, __c) \
5987 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
5988 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
5989 __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \
5993 #define vld4q_lane_u32(__a, __b, __c) \
5995 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
5996 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __rv; \
5997 __rv.__o = __builtin_neon_vld4_lanev4si (__a, __bu.__o, __c); \
6001 #define vld4q_lane_p16(__a, __b, __c) \
6003 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6004 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __rv; \
6005 __rv.__o = __builtin_neon_vld4_lanev8hi (__a, __bu.__o, __c); \
6009 #define vld4_dup_s8(__a) \
6011 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
6012 __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \
6016 #define vld4_dup_s16(__a) \
6018 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
6019 __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \
6023 #define vld4_dup_s32(__a) \
6025 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
6026 __rv.__o = __builtin_neon_vld4_dupv2si (__a); \
6030 #define vld4_dup_f32(__a) \
6032 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __rv; \
6033 __rv.__o = __builtin_neon_vld4_dupv2sf (__a); \
6037 #define vld4_dup_u8(__a) \
6039 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
6040 __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \
6044 #define vld4_dup_u16(__a) \
6046 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
6047 __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \
6051 #define vld4_dup_u32(__a) \
6053 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __rv; \
6054 __rv.__o = __builtin_neon_vld4_dupv2si (__a); \
6058 #define vld4_dup_p8(__a) \
6060 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __rv; \
6061 __rv.__o = __builtin_neon_vld4_dupv8qi (__a); \
6065 #define vld4_dup_p16(__a) \
6067 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __rv; \
6068 __rv.__o = __builtin_neon_vld4_dupv4hi (__a); \
6072 #define vld4_dup_s64(__a) \
6074 union { int64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
6075 __rv.__o = __builtin_neon_vld4_dupv1di (__a); \
6079 #define vld4_dup_u64(__a) \
6081 union { uint64x1x4_t __i; __builtin_neon_v1di4 __o; } __rv; \
6082 __rv.__o = __builtin_neon_vld4_dupv1di (__a); \
6086 #define vst4_s8(__a, __b) \
6088 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6089 __builtin_neon_vst4v8qi (__a, __bu.__o); \
6092 #define vst4_s16(__a, __b) \
6094 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6095 __builtin_neon_vst4v4hi (__a, __bu.__o); \
6098 #define vst4_s32(__a, __b) \
6100 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6101 __builtin_neon_vst4v2si (__a, __bu.__o); \
6104 #define vst4_f32(__a, __b) \
6106 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __bu = { __b }; \
6107 __builtin_neon_vst4v2sf (__a, __bu.__o); \
6110 #define vst4_u8(__a, __b) \
6112 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6113 __builtin_neon_vst4v8qi (__a, __bu.__o); \
6116 #define vst4_u16(__a, __b) \
6118 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6119 __builtin_neon_vst4v4hi (__a, __bu.__o); \
6122 #define vst4_u32(__a, __b) \
6124 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6125 __builtin_neon_vst4v2si (__a, __bu.__o); \
6128 #define vst4_p8(__a, __b) \
6130 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6131 __builtin_neon_vst4v8qi (__a, __bu.__o); \
6134 #define vst4_p16(__a, __b) \
6136 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6137 __builtin_neon_vst4v4hi (__a, __bu.__o); \
6140 #define vst4_s64(__a, __b) \
6142 union { int64x1x4_t __i; __builtin_neon_v1di4 __o; } __bu = { __b }; \
6143 __builtin_neon_vst4v1di (__a, __bu.__o); \
6146 #define vst4_u64(__a, __b) \
6148 union { uint64x1x4_t __i; __builtin_neon_v1di4 __o; } __bu = { __b }; \
6149 __builtin_neon_vst4v1di (__a, __bu.__o); \
6152 #define vst4q_s8(__a, __b) \
6154 union { int8x16x4_t __i; __builtin_neon_v16qi4 __o; } __bu = { __b }; \
6155 __builtin_neon_vst4v16qi (__a, __bu.__o); \
6158 #define vst4q_s16(__a, __b) \
6160 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6161 __builtin_neon_vst4v8hi (__a, __bu.__o); \
6164 #define vst4q_s32(__a, __b) \
6166 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6167 __builtin_neon_vst4v4si (__a, __bu.__o); \
6170 #define vst4q_f32(__a, __b) \
6172 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __bu = { __b }; \
6173 __builtin_neon_vst4v4sf (__a, __bu.__o); \
6176 #define vst4q_u8(__a, __b) \
6178 union { uint8x16x4_t __i; __builtin_neon_v16qi4 __o; } __bu = { __b }; \
6179 __builtin_neon_vst4v16qi (__a, __bu.__o); \
6182 #define vst4q_u16(__a, __b) \
6184 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6185 __builtin_neon_vst4v8hi (__a, __bu.__o); \
6188 #define vst4q_u32(__a, __b) \
6190 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6191 __builtin_neon_vst4v4si (__a, __bu.__o); \
6194 #define vst4q_p8(__a, __b) \
6196 union { poly8x16x4_t __i; __builtin_neon_v16qi4 __o; } __bu = { __b }; \
6197 __builtin_neon_vst4v16qi (__a, __bu.__o); \
6200 #define vst4q_p16(__a, __b) \
6202 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6203 __builtin_neon_vst4v8hi (__a, __bu.__o); \
6206 #define vst4_lane_s8(__a, __b, __c) \
6208 union { int8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6209 __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \
6212 #define vst4_lane_s16(__a, __b, __c) \
6214 union { int16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6215 __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \
6218 #define vst4_lane_s32(__a, __b, __c) \
6220 union { int32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6221 __builtin_neon_vst4_lanev2si (__a, __bu.__o, __c); \
6224 #define vst4_lane_f32(__a, __b, __c) \
6226 union { float32x2x4_t __i; __builtin_neon_v2sf4 __o; } __bu = { __b }; \
6227 __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); \
6230 #define vst4_lane_u8(__a, __b, __c) \
6232 union { uint8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6233 __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \
6236 #define vst4_lane_u16(__a, __b, __c) \
6238 union { uint16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6239 __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \
6242 #define vst4_lane_u32(__a, __b, __c) \
6244 union { uint32x2x4_t __i; __builtin_neon_v2si4 __o; } __bu = { __b }; \
6245 __builtin_neon_vst4_lanev2si (__a, __bu.__o, __c); \
6248 #define vst4_lane_p8(__a, __b, __c) \
6250 union { poly8x8x4_t __i; __builtin_neon_v8qi4 __o; } __bu = { __b }; \
6251 __builtin_neon_vst4_lanev8qi (__a, __bu.__o, __c); \
6254 #define vst4_lane_p16(__a, __b, __c) \
6256 union { poly16x4x4_t __i; __builtin_neon_v4hi4 __o; } __bu = { __b }; \
6257 __builtin_neon_vst4_lanev4hi (__a, __bu.__o, __c); \
6260 #define vst4q_lane_s16(__a, __b, __c) \
6262 union { int16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6263 __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \
6266 #define vst4q_lane_s32(__a, __b, __c) \
6268 union { int32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6269 __builtin_neon_vst4_lanev4si (__a, __bu.__o, __c); \
6272 #define vst4q_lane_f32(__a, __b, __c) \
6274 union { float32x4x4_t __i; __builtin_neon_v4sf4 __o; } __bu = { __b }; \
6275 __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); \
6278 #define vst4q_lane_u16(__a, __b, __c) \
6280 union { uint16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6281 __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \
6284 #define vst4q_lane_u32(__a, __b, __c) \
6286 union { uint32x4x4_t __i; __builtin_neon_v4si4 __o; } __bu = { __b }; \
6287 __builtin_neon_vst4_lanev4si (__a, __bu.__o, __c); \
6290 #define vst4q_lane_p16(__a, __b, __c) \
6292 union { poly16x8x4_t __i; __builtin_neon_v8hi4 __o; } __bu = { __b }; \
6293 __builtin_neon_vst4_lanev8hi (__a, __bu.__o, __c); \
6296 #define vand_s8(__a, __b) \
6297 (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1)
6299 #define vand_s16(__a, __b) \
6300 (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1)
6302 #define vand_s32(__a, __b) \
6303 (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1)
6305 #define vand_s64(__a, __b) \
6306 (int64x1_t)__builtin_neon_vandv1di (__a, __b, 1)
6308 #define vand_u8(__a, __b) \
6309 (uint8x8_t)__builtin_neon_vandv8qi (__a, __b, 0)
6311 #define vand_u16(__a, __b) \
6312 (uint16x4_t)__builtin_neon_vandv4hi (__a, __b, 0)
6314 #define vand_u32(__a, __b) \
6315 (uint32x2_t)__builtin_neon_vandv2si (__a, __b, 0)
6317 #define vand_u64(__a, __b) \
6318 (uint64x1_t)__builtin_neon_vandv1di (__a, __b, 0)
6320 #define vandq_s8(__a, __b) \
6321 (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1)
6323 #define vandq_s16(__a, __b) \
6324 (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1)
6326 #define vandq_s32(__a, __b) \
6327 (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1)
6329 #define vandq_s64(__a, __b) \
6330 (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1)
6332 #define vandq_u8(__a, __b) \
6333 (uint8x16_t)__builtin_neon_vandv16qi (__a, __b, 0)
6335 #define vandq_u16(__a, __b) \
6336 (uint16x8_t)__builtin_neon_vandv8hi (__a, __b, 0)
6338 #define vandq_u32(__a, __b) \
6339 (uint32x4_t)__builtin_neon_vandv4si (__a, __b, 0)
6341 #define vandq_u64(__a, __b) \
6342 (uint64x2_t)__builtin_neon_vandv2di (__a, __b, 0)
6344 #define vorr_s8(__a, __b) \
6345 (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1)
6347 #define vorr_s16(__a, __b) \
6348 (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1)
6350 #define vorr_s32(__a, __b) \
6351 (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1)
6353 #define vorr_s64(__a, __b) \
6354 (int64x1_t)__builtin_neon_vorrv1di (__a, __b, 1)
6356 #define vorr_u8(__a, __b) \
6357 (uint8x8_t)__builtin_neon_vorrv8qi (__a, __b, 0)
6359 #define vorr_u16(__a, __b) \
6360 (uint16x4_t)__builtin_neon_vorrv4hi (__a, __b, 0)
6362 #define vorr_u32(__a, __b) \
6363 (uint32x2_t)__builtin_neon_vorrv2si (__a, __b, 0)
6365 #define vorr_u64(__a, __b) \
6366 (uint64x1_t)__builtin_neon_vorrv1di (__a, __b, 0)
6368 #define vorrq_s8(__a, __b) \
6369 (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1)
6371 #define vorrq_s16(__a, __b) \
6372 (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1)
6374 #define vorrq_s32(__a, __b) \
6375 (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1)
6377 #define vorrq_s64(__a, __b) \
6378 (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1)
6380 #define vorrq_u8(__a, __b) \
6381 (uint8x16_t)__builtin_neon_vorrv16qi (__a, __b, 0)
6383 #define vorrq_u16(__a, __b) \
6384 (uint16x8_t)__builtin_neon_vorrv8hi (__a, __b, 0)
6386 #define vorrq_u32(__a, __b) \
6387 (uint32x4_t)__builtin_neon_vorrv4si (__a, __b, 0)
6389 #define vorrq_u64(__a, __b) \
6390 (uint64x2_t)__builtin_neon_vorrv2di (__a, __b, 0)
6392 #define veor_s8(__a, __b) \
6393 (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1)
6395 #define veor_s16(__a, __b) \
6396 (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1)
6398 #define veor_s32(__a, __b) \
6399 (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1)
6401 #define veor_s64(__a, __b) \
6402 (int64x1_t)__builtin_neon_veorv1di (__a, __b, 1)
6404 #define veor_u8(__a, __b) \
6405 (uint8x8_t)__builtin_neon_veorv8qi (__a, __b, 0)
6407 #define veor_u16(__a, __b) \
6408 (uint16x4_t)__builtin_neon_veorv4hi (__a, __b, 0)
6410 #define veor_u32(__a, __b) \
6411 (uint32x2_t)__builtin_neon_veorv2si (__a, __b, 0)
6413 #define veor_u64(__a, __b) \
6414 (uint64x1_t)__builtin_neon_veorv1di (__a, __b, 0)
6416 #define veorq_s8(__a, __b) \
6417 (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1)
6419 #define veorq_s16(__a, __b) \
6420 (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1)
6422 #define veorq_s32(__a, __b) \
6423 (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1)
6425 #define veorq_s64(__a, __b) \
6426 (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1)
6428 #define veorq_u8(__a, __b) \
6429 (uint8x16_t)__builtin_neon_veorv16qi (__a, __b, 0)
6431 #define veorq_u16(__a, __b) \
6432 (uint16x8_t)__builtin_neon_veorv8hi (__a, __b, 0)
6434 #define veorq_u32(__a, __b) \
6435 (uint32x4_t)__builtin_neon_veorv4si (__a, __b, 0)
6437 #define veorq_u64(__a, __b) \
6438 (uint64x2_t)__builtin_neon_veorv2di (__a, __b, 0)
6440 #define vbic_s8(__a, __b) \
6441 (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1)
6443 #define vbic_s16(__a, __b) \
6444 (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1)
6446 #define vbic_s32(__a, __b) \
6447 (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1)
6449 #define vbic_s64(__a, __b) \
6450 (int64x1_t)__builtin_neon_vbicv1di (__a, __b, 1)
6452 #define vbic_u8(__a, __b) \
6453 (uint8x8_t)__builtin_neon_vbicv8qi (__a, __b, 0)
6455 #define vbic_u16(__a, __b) \
6456 (uint16x4_t)__builtin_neon_vbicv4hi (__a, __b, 0)
6458 #define vbic_u32(__a, __b) \
6459 (uint32x2_t)__builtin_neon_vbicv2si (__a, __b, 0)
6461 #define vbic_u64(__a, __b) \
6462 (uint64x1_t)__builtin_neon_vbicv1di (__a, __b, 0)
6464 #define vbicq_s8(__a, __b) \
6465 (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1)
6467 #define vbicq_s16(__a, __b) \
6468 (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1)
6470 #define vbicq_s32(__a, __b) \
6471 (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1)
6473 #define vbicq_s64(__a, __b) \
6474 (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1)
6476 #define vbicq_u8(__a, __b) \
6477 (uint8x16_t)__builtin_neon_vbicv16qi (__a, __b, 0)
6479 #define vbicq_u16(__a, __b) \
6480 (uint16x8_t)__builtin_neon_vbicv8hi (__a, __b, 0)
6482 #define vbicq_u32(__a, __b) \
6483 (uint32x4_t)__builtin_neon_vbicv4si (__a, __b, 0)
6485 #define vbicq_u64(__a, __b) \
6486 (uint64x2_t)__builtin_neon_vbicv2di (__a, __b, 0)
6488 #define vorn_s8(__a, __b) \
6489 (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1)
6491 #define vorn_s16(__a, __b) \
6492 (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1)
6494 #define vorn_s32(__a, __b) \
6495 (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1)
6497 #define vorn_s64(__a, __b) \
6498 (int64x1_t)__builtin_neon_vornv1di (__a, __b, 1)
6500 #define vorn_u8(__a, __b) \
6501 (uint8x8_t)__builtin_neon_vornv8qi (__a, __b, 0)
6503 #define vorn_u16(__a, __b) \
6504 (uint16x4_t)__builtin_neon_vornv4hi (__a, __b, 0)
6506 #define vorn_u32(__a, __b) \
6507 (uint32x2_t)__builtin_neon_vornv2si (__a, __b, 0)
6509 #define vorn_u64(__a, __b) \
6510 (uint64x1_t)__builtin_neon_vornv1di (__a, __b, 0)
6512 #define vornq_s8(__a, __b) \
6513 (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1)
6515 #define vornq_s16(__a, __b) \
6516 (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1)
6518 #define vornq_s32(__a, __b) \
6519 (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1)
6521 #define vornq_s64(__a, __b) \
6522 (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1)
6524 #define vornq_u8(__a, __b) \
6525 (uint8x16_t)__builtin_neon_vornv16qi (__a, __b, 0)
6527 #define vornq_u16(__a, __b) \
6528 (uint16x8_t)__builtin_neon_vornv8hi (__a, __b, 0)
6530 #define vornq_u32(__a, __b) \
6531 (uint32x4_t)__builtin_neon_vornv4si (__a, __b, 0)
6533 #define vornq_u64(__a, __b) \
6534 (uint64x2_t)__builtin_neon_vornv2di (__a, __b, 0)
6537 #define vreinterpret_p8_s8(__a) \
6538 (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6540 #define vreinterpret_p8_s16(__a) \
6541 (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6543 #define vreinterpret_p8_s32(__a) \
6544 (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6546 #define vreinterpret_p8_s64(__a) \
6547 (poly8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6549 #define vreinterpret_p8_f32(__a) \
6550 (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a)
6552 #define vreinterpret_p8_u8(__a) \
6553 (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6555 #define vreinterpret_p8_u16(__a) \
6556 (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6558 #define vreinterpret_p8_u32(__a) \
6559 (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6561 #define vreinterpret_p8_u64(__a) \
6562 (poly8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6564 #define vreinterpret_p8_p16(__a) \
6565 (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6567 #define vreinterpretq_p8_s8(__a) \
6568 (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6570 #define vreinterpretq_p8_s16(__a) \
6571 (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6573 #define vreinterpretq_p8_s32(__a) \
6574 (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6576 #define vreinterpretq_p8_s64(__a) \
6577 (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6579 #define vreinterpretq_p8_f32(__a) \
6580 (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a)
6582 #define vreinterpretq_p8_u8(__a) \
6583 (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6585 #define vreinterpretq_p8_u16(__a) \
6586 (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6588 #define vreinterpretq_p8_u32(__a) \
6589 (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6591 #define vreinterpretq_p8_u64(__a) \
6592 (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6594 #define vreinterpretq_p8_p16(__a) \
6595 (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6597 #define vreinterpret_p16_s8(__a) \
6598 (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6600 #define vreinterpret_p16_s16(__a) \
6601 (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6603 #define vreinterpret_p16_s32(__a) \
6604 (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6606 #define vreinterpret_p16_s64(__a) \
6607 (poly16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6609 #define vreinterpret_p16_f32(__a) \
6610 (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a)
6612 #define vreinterpret_p16_u8(__a) \
6613 (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6615 #define vreinterpret_p16_u16(__a) \
6616 (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6618 #define vreinterpret_p16_u32(__a) \
6619 (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6621 #define vreinterpret_p16_u64(__a) \
6622 (poly16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6624 #define vreinterpret_p16_p8(__a) \
6625 (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6627 #define vreinterpretq_p16_s8(__a) \
6628 (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6630 #define vreinterpretq_p16_s16(__a) \
6631 (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6633 #define vreinterpretq_p16_s32(__a) \
6634 (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6636 #define vreinterpretq_p16_s64(__a) \
6637 (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6639 #define vreinterpretq_p16_f32(__a) \
6640 (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a)
6642 #define vreinterpretq_p16_u8(__a) \
6643 (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6645 #define vreinterpretq_p16_u16(__a) \
6646 (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6648 #define vreinterpretq_p16_u32(__a) \
6649 (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6651 #define vreinterpretq_p16_u64(__a) \
6652 (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6654 #define vreinterpretq_p16_p8(__a) \
6655 (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6657 #define vreinterpret_f32_s8(__a) \
6658 (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a)
6660 #define vreinterpret_f32_s16(__a) \
6661 (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a)
6663 #define vreinterpret_f32_s32(__a) \
6664 (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a)
6666 #define vreinterpret_f32_s64(__a) \
6667 (float32x2_t)__builtin_neon_vreinterpretv2sfv1di (__a)
6669 #define vreinterpret_f32_u8(__a) \
6670 (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a)
6672 #define vreinterpret_f32_u16(__a) \
6673 (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a)
6675 #define vreinterpret_f32_u32(__a) \
6676 (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a)
6678 #define vreinterpret_f32_u64(__a) \
6679 (float32x2_t)__builtin_neon_vreinterpretv2sfv1di (__a)
6681 #define vreinterpret_f32_p8(__a) \
6682 (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a)
6684 #define vreinterpret_f32_p16(__a) \
6685 (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a)
6687 #define vreinterpretq_f32_s8(__a) \
6688 (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a)
6690 #define vreinterpretq_f32_s16(__a) \
6691 (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a)
6693 #define vreinterpretq_f32_s32(__a) \
6694 (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a)
6696 #define vreinterpretq_f32_s64(__a) \
6697 (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a)
6699 #define vreinterpretq_f32_u8(__a) \
6700 (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a)
6702 #define vreinterpretq_f32_u16(__a) \
6703 (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a)
6705 #define vreinterpretq_f32_u32(__a) \
6706 (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a)
6708 #define vreinterpretq_f32_u64(__a) \
6709 (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a)
6711 #define vreinterpretq_f32_p8(__a) \
6712 (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a)
6714 #define vreinterpretq_f32_p16(__a) \
6715 (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a)
6717 #define vreinterpret_s64_s8(__a) \
6718 (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6720 #define vreinterpret_s64_s16(__a) \
6721 (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6723 #define vreinterpret_s64_s32(__a) \
6724 (int64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6726 #define vreinterpret_s64_f32(__a) \
6727 (int64x1_t)__builtin_neon_vreinterpretv1div2sf (__a)
6729 #define vreinterpret_s64_u8(__a) \
6730 (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6732 #define vreinterpret_s64_u16(__a) \
6733 (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6735 #define vreinterpret_s64_u32(__a) \
6736 (int64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6738 #define vreinterpret_s64_u64(__a) \
6739 (int64x1_t)__builtin_neon_vreinterpretv1div1di (__a)
6741 #define vreinterpret_s64_p8(__a) \
6742 (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6744 #define vreinterpret_s64_p16(__a) \
6745 (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6747 #define vreinterpretq_s64_s8(__a) \
6748 (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6750 #define vreinterpretq_s64_s16(__a) \
6751 (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6753 #define vreinterpretq_s64_s32(__a) \
6754 (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6756 #define vreinterpretq_s64_f32(__a) \
6757 (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a)
6759 #define vreinterpretq_s64_u8(__a) \
6760 (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6762 #define vreinterpretq_s64_u16(__a) \
6763 (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6765 #define vreinterpretq_s64_u32(__a) \
6766 (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6768 #define vreinterpretq_s64_u64(__a) \
6769 (int64x2_t)__builtin_neon_vreinterpretv2div2di (__a)
6771 #define vreinterpretq_s64_p8(__a) \
6772 (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6774 #define vreinterpretq_s64_p16(__a) \
6775 (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6777 #define vreinterpret_u64_s8(__a) \
6778 (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6780 #define vreinterpret_u64_s16(__a) \
6781 (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6783 #define vreinterpret_u64_s32(__a) \
6784 (uint64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6786 #define vreinterpret_u64_s64(__a) \
6787 (uint64x1_t)__builtin_neon_vreinterpretv1div1di (__a)
6789 #define vreinterpret_u64_f32(__a) \
6790 (uint64x1_t)__builtin_neon_vreinterpretv1div2sf (__a)
6792 #define vreinterpret_u64_u8(__a) \
6793 (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6795 #define vreinterpret_u64_u16(__a) \
6796 (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6798 #define vreinterpret_u64_u32(__a) \
6799 (uint64x1_t)__builtin_neon_vreinterpretv1div2si (__a)
6801 #define vreinterpret_u64_p8(__a) \
6802 (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a)
6804 #define vreinterpret_u64_p16(__a) \
6805 (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a)
6807 #define vreinterpretq_u64_s8(__a) \
6808 (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6810 #define vreinterpretq_u64_s16(__a) \
6811 (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6813 #define vreinterpretq_u64_s32(__a) \
6814 (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6816 #define vreinterpretq_u64_s64(__a) \
6817 (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a)
6819 #define vreinterpretq_u64_f32(__a) \
6820 (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a)
6822 #define vreinterpretq_u64_u8(__a) \
6823 (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6825 #define vreinterpretq_u64_u16(__a) \
6826 (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6828 #define vreinterpretq_u64_u32(__a) \
6829 (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a)
6831 #define vreinterpretq_u64_p8(__a) \
6832 (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a)
6834 #define vreinterpretq_u64_p16(__a) \
6835 (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a)
6837 #define vreinterpret_s8_s16(__a) \
6838 (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6840 #define vreinterpret_s8_s32(__a) \
6841 (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6843 #define vreinterpret_s8_s64(__a) \
6844 (int8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6846 #define vreinterpret_s8_f32(__a) \
6847 (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a)
6849 #define vreinterpret_s8_u8(__a) \
6850 (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6852 #define vreinterpret_s8_u16(__a) \
6853 (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6855 #define vreinterpret_s8_u32(__a) \
6856 (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
6858 #define vreinterpret_s8_u64(__a) \
6859 (int8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
6861 #define vreinterpret_s8_p8(__a) \
6862 (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
6864 #define vreinterpret_s8_p16(__a) \
6865 (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
6867 #define vreinterpretq_s8_s16(__a) \
6868 (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6870 #define vreinterpretq_s8_s32(__a) \
6871 (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6873 #define vreinterpretq_s8_s64(__a) \
6874 (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6876 #define vreinterpretq_s8_f32(__a) \
6877 (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a)
6879 #define vreinterpretq_s8_u8(__a) \
6880 (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6882 #define vreinterpretq_s8_u16(__a) \
6883 (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6885 #define vreinterpretq_s8_u32(__a) \
6886 (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
6888 #define vreinterpretq_s8_u64(__a) \
6889 (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
6891 #define vreinterpretq_s8_p8(__a) \
6892 (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
6894 #define vreinterpretq_s8_p16(__a) \
6895 (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
6897 #define vreinterpret_s16_s8(__a) \
6898 (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6900 #define vreinterpret_s16_s32(__a) \
6901 (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6903 #define vreinterpret_s16_s64(__a) \
6904 (int16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6906 #define vreinterpret_s16_f32(__a) \
6907 (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a)
6909 #define vreinterpret_s16_u8(__a) \
6910 (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6912 #define vreinterpret_s16_u16(__a) \
6913 (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6915 #define vreinterpret_s16_u32(__a) \
6916 (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
6918 #define vreinterpret_s16_u64(__a) \
6919 (int16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
6921 #define vreinterpret_s16_p8(__a) \
6922 (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
6924 #define vreinterpret_s16_p16(__a) \
6925 (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
6927 #define vreinterpretq_s16_s8(__a) \
6928 (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6930 #define vreinterpretq_s16_s32(__a) \
6931 (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6933 #define vreinterpretq_s16_s64(__a) \
6934 (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6936 #define vreinterpretq_s16_f32(__a) \
6937 (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a)
6939 #define vreinterpretq_s16_u8(__a) \
6940 (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6942 #define vreinterpretq_s16_u16(__a) \
6943 (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6945 #define vreinterpretq_s16_u32(__a) \
6946 (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
6948 #define vreinterpretq_s16_u64(__a) \
6949 (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
6951 #define vreinterpretq_s16_p8(__a) \
6952 (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
6954 #define vreinterpretq_s16_p16(__a) \
6955 (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
6957 #define vreinterpret_s32_s8(__a) \
6958 (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
6960 #define vreinterpret_s32_s16(__a) \
6961 (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
6963 #define vreinterpret_s32_s64(__a) \
6964 (int32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
6966 #define vreinterpret_s32_f32(__a) \
6967 (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a)
6969 #define vreinterpret_s32_u8(__a) \
6970 (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
6972 #define vreinterpret_s32_u16(__a) \
6973 (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
6975 #define vreinterpret_s32_u32(__a) \
6976 (int32x2_t)__builtin_neon_vreinterpretv2siv2si (__a)
6978 #define vreinterpret_s32_u64(__a) \
6979 (int32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
6981 #define vreinterpret_s32_p8(__a) \
6982 (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
6984 #define vreinterpret_s32_p16(__a) \
6985 (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
6987 #define vreinterpretq_s32_s8(__a) \
6988 (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
6990 #define vreinterpretq_s32_s16(__a) \
6991 (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
6993 #define vreinterpretq_s32_s64(__a) \
6994 (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
6996 #define vreinterpretq_s32_f32(__a) \
6997 (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a)
6999 #define vreinterpretq_s32_u8(__a) \
7000 (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7002 #define vreinterpretq_s32_u16(__a) \
7003 (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7005 #define vreinterpretq_s32_u32(__a) \
7006 (int32x4_t)__builtin_neon_vreinterpretv4siv4si (__a)
7008 #define vreinterpretq_s32_u64(__a) \
7009 (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
7011 #define vreinterpretq_s32_p8(__a) \
7012 (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7014 #define vreinterpretq_s32_p16(__a) \
7015 (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7017 #define vreinterpret_u8_s8(__a) \
7018 (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
7020 #define vreinterpret_u8_s16(__a) \
7021 (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
7023 #define vreinterpret_u8_s32(__a) \
7024 (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
7026 #define vreinterpret_u8_s64(__a) \
7027 (uint8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
7029 #define vreinterpret_u8_f32(__a) \
7030 (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a)
7032 #define vreinterpret_u8_u16(__a) \
7033 (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
7035 #define vreinterpret_u8_u32(__a) \
7036 (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a)
7038 #define vreinterpret_u8_u64(__a) \
7039 (uint8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a)
7041 #define vreinterpret_u8_p8(__a) \
7042 (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a)
7044 #define vreinterpret_u8_p16(__a) \
7045 (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a)
7047 #define vreinterpretq_u8_s8(__a) \
7048 (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
7050 #define vreinterpretq_u8_s16(__a) \
7051 (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
7053 #define vreinterpretq_u8_s32(__a) \
7054 (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
7056 #define vreinterpretq_u8_s64(__a) \
7057 (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
7059 #define vreinterpretq_u8_f32(__a) \
7060 (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a)
7062 #define vreinterpretq_u8_u16(__a) \
7063 (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
7065 #define vreinterpretq_u8_u32(__a) \
7066 (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a)
7068 #define vreinterpretq_u8_u64(__a) \
7069 (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a)
7071 #define vreinterpretq_u8_p8(__a) \
7072 (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a)
7074 #define vreinterpretq_u8_p16(__a) \
7075 (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a)
7077 #define vreinterpret_u16_s8(__a) \
7078 (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
7080 #define vreinterpret_u16_s16(__a) \
7081 (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
7083 #define vreinterpret_u16_s32(__a) \
7084 (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
7086 #define vreinterpret_u16_s64(__a) \
7087 (uint16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
7089 #define vreinterpret_u16_f32(__a) \
7090 (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a)
7092 #define vreinterpret_u16_u8(__a) \
7093 (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
7095 #define vreinterpret_u16_u32(__a) \
7096 (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a)
7098 #define vreinterpret_u16_u64(__a) \
7099 (uint16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a)
7101 #define vreinterpret_u16_p8(__a) \
7102 (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a)
7104 #define vreinterpret_u16_p16(__a) \
7105 (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a)
7107 #define vreinterpretq_u16_s8(__a) \
7108 (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
7110 #define vreinterpretq_u16_s16(__a) \
7111 (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
7113 #define vreinterpretq_u16_s32(__a) \
7114 (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
7116 #define vreinterpretq_u16_s64(__a) \
7117 (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
7119 #define vreinterpretq_u16_f32(__a) \
7120 (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a)
7122 #define vreinterpretq_u16_u8(__a) \
7123 (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
7125 #define vreinterpretq_u16_u32(__a) \
7126 (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a)
7128 #define vreinterpretq_u16_u64(__a) \
7129 (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a)
7131 #define vreinterpretq_u16_p8(__a) \
7132 (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a)
7134 #define vreinterpretq_u16_p16(__a) \
7135 (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a)
7137 #define vreinterpret_u32_s8(__a) \
7138 (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
7140 #define vreinterpret_u32_s16(__a) \
7141 (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
7143 #define vreinterpret_u32_s32(__a) \
7144 (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a)
7146 #define vreinterpret_u32_s64(__a) \
7147 (uint32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
7149 #define vreinterpret_u32_f32(__a) \
7150 (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a)
7152 #define vreinterpret_u32_u8(__a) \
7153 (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
7155 #define vreinterpret_u32_u16(__a) \
7156 (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
7158 #define vreinterpret_u32_u64(__a) \
7159 (uint32x2_t)__builtin_neon_vreinterpretv2siv1di (__a)
7161 #define vreinterpret_u32_p8(__a) \
7162 (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a)
7164 #define vreinterpret_u32_p16(__a) \
7165 (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a)
7167 #define vreinterpretq_u32_s8(__a) \
7168 (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7170 #define vreinterpretq_u32_s16(__a) \
7171 (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7173 #define vreinterpretq_u32_s32(__a) \
7174 (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a)
7176 #define vreinterpretq_u32_s64(__a) \
7177 (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
7179 #define vreinterpretq_u32_f32(__a) \
7180 (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a)
7182 #define vreinterpretq_u32_u8(__a) \
7183 (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7185 #define vreinterpretq_u32_u16(__a) \
7186 (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)
7188 #define vreinterpretq_u32_u64(__a) \
7189 (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a)
7191 #define vreinterpretq_u32_p8(__a) \
7192 (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a)
7194 #define vreinterpretq_u32_p16(__a) \
7195 (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a)