3 gcc -o v8crypto v8crypto.c -march=armv8-a -mfpu=crypto-neon-fp-armv8
4 gcc -o v8crypto v8crypto.c -mfpu=crypto-neon-fp-armv8
9 #include <malloc.h> // memalign
10 #include <string.h> // memset
11 #include "tests/malloc.h"
12 #include <math.h> // isnormal
14 typedef unsigned char UChar
;
15 typedef unsigned short int UShort
;
16 typedef unsigned int UInt
;
17 typedef signed int Int
;
18 typedef unsigned char UChar
;
19 typedef unsigned long long int ULong
;
20 typedef signed long long int Long
;
21 typedef double Double
;
24 typedef unsigned char Bool
;
25 #define False ((Bool)0)
26 #define True ((Bool)1)
32 enum { TyHF
=1234, TySF
, TyDF
, TyB
, TyH
, TyS
, TyD
, TyNONE
}
43 typedef union _V128 V128
;
45 static inline UChar
randUChar ( void )
47 static UInt seed
= 80021;
48 seed
= 1103515245 * seed
+ 12345;
49 return (seed
>> 17) & 0xFF;
52 //static ULong randULong ( LaneTy ty )
56 // for (i = 0; i < 8; i++) {
57 // r = (r << 8) | (ULong)(0xFF & randUChar());
62 /* Generates a random V128. Ensures that that it contains normalised
63 FP numbers when viewed as either F32x4 or F64x2, so that it is
64 reasonable to use in FP test cases. */
65 static void randV128 ( /*OUT*/V128
* v
, LaneTy ty
)
67 static UInt nCalls
= 0, nIters
= 0;
72 for (i
= 0; i
< 16; i
++) {
73 v
->u8
[i
] = randUChar();
75 if (isnormal(v
->f32
[0]) && isnormal(v
->f32
[1]) && isnormal(v
->f32
[2])
76 && isnormal(v
->f32
[3]) && isnormal(v
->f64
[0]) && isnormal(v
->f64
[1]))
79 if (0 == (nCalls
& 0xFF))
80 printf("randV128: %u calls, %u iters\n", nCalls
, nIters
);
83 static void showV128 ( V128
* v
)
86 for (i
= 15; i
>= 0; i
--)
87 printf("%02x", (Int
)v
->u8
[i
]);
90 //static void showBlock ( const char* msg, V128* block, Int nBlock )
93 // printf("%s\n", msg);
94 // for (i = 0; i < nBlock; i++) {
96 // showV128(&block[i]);
102 /* ---------------------------------------------------------------- */
103 /* -- Parameterisable test macros -- */
104 /* ---------------------------------------------------------------- */
106 #define DO50(_action) \
108 Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
112 /* Generate a test that involves two vector regs,
113 with no bias as towards which is input or output.
114 It's OK to use r8 as scratch.*/
115 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
116 __attribute__((noinline)) \
117 static void test_##TESTNAME ( LaneTy ty ) { \
119 for (i = 0; i < ITERS; i++) { \
121 memset(block, 0x55, sizeof(block)); \
122 randV128(&block[0], ty); \
123 randV128(&block[1], ty); \
124 randV128(&block[2], ty); \
125 randV128(&block[3], ty); \
126 __asm__ __volatile__( \
127 "mov r9, #0 ; vmsr fpscr, r9 ; " \
128 "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \
129 "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \
131 "add r9, %0, #32 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \
132 "add r9, %0, #48 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \
133 "vmrs r9, fpscr ; str r9, [%0, #64] " \
135 : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "r8", "r9" \
138 UInt fpscr = 0xFFFFFFFF & block[4].u32[0]; \
139 showV128(&block[0]); printf(" "); \
140 showV128(&block[1]); printf(" "); \
141 showV128(&block[2]); printf(" "); \
142 showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \
147 /* Generate a test that involves three vector regs,
148 with no bias as towards which is input or output. It's also OK
149 to use r8 scratch. */
150 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
151 __attribute__((noinline)) \
152 static void test_##TESTNAME ( LaneTy ty ) { \
154 for (i = 0; i < ITERS; i++) { \
156 memset(block, 0x55, sizeof(block)); \
157 randV128(&block[0], ty); \
158 randV128(&block[1], ty); \
159 randV128(&block[2], ty); \
160 randV128(&block[3], ty); \
161 randV128(&block[4], ty); \
162 randV128(&block[5], ty); \
163 __asm__ __volatile__( \
164 "mov r9, #0 ; vmsr fpscr, r9 ; " \
165 "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \
166 "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \
167 "add r9, %0, #32 ; vld1.8 { q"#VECREG3NO" }, [r9] ; " \
169 "add r9, %0, #48 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \
170 "add r9, %0, #64 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \
171 "add r9, %0, #80 ; vst1.8 { q"#VECREG3NO" }, [r9] ; " \
172 "vmrs r9, fpscr ; str r9, [%0, #96] " \
174 : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "q"#VECREG3NO, \
178 UInt fpscr = 0xFFFFFFFF & block[6].u32[0]; \
179 showV128(&block[0]); printf(" "); \
180 showV128(&block[1]); printf(" "); \
181 showV128(&block[2]); printf(" "); \
182 showV128(&block[3]); printf(" "); \
183 showV128(&block[4]); printf(" "); \
184 showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \
188 // ======================== CRYPTO ========================
190 GEN_TWOVEC_TEST(aesd_q_q
, "aesd.8 q3, q4", 3, 4)
191 GEN_TWOVEC_TEST(aese_q_q
, "aese.8 q12, q13", 12, 13)
192 GEN_TWOVEC_TEST(aesimc_q_q
, "aesimc.8 q15, q0", 15, 0)
193 GEN_TWOVEC_TEST(aesmc_q_q
, "aesmc.8 q1, q9", 1, 9)
195 GEN_THREEVEC_TEST(sha1c_q_q_q
, "sha1c.32 q11, q10, q2", 11, 10, 2)
196 GEN_TWOVEC_TEST(sha1h_q_q
, "sha1h.32 q6, q7", 6, 7)
197 GEN_THREEVEC_TEST(sha1m_q_q_q
, "sha1m.32 q2, q8, q13", 2, 8, 13)
198 GEN_THREEVEC_TEST(sha1p_q_q_q
, "sha1p.32 q3, q9, q14", 3, 9, 14)
199 GEN_THREEVEC_TEST(sha1su0_q_q_q
, "sha1su0.32 q4, q10, q15", 4, 10, 15)
200 GEN_TWOVEC_TEST(sha1su1_q_q
, "sha1su1.32 q11, q2", 11, 2)
202 GEN_THREEVEC_TEST(sha256h2_q_q_q
, "sha256h2.32 q9, q8, q7", 9, 8, 7)
203 GEN_THREEVEC_TEST(sha256h_q_q_q
, "sha256h.32 q10, q9, q8", 10, 9, 8)
204 GEN_TWOVEC_TEST(sha256su0_q_q
, "sha256su0.32 q11, q10", 11, 10)
205 GEN_THREEVEC_TEST(sha256su1_q_q_q
, "sha256su1.32 q12, q11, q10", 12, 11, 10)
207 // This is a bit complex. This really mentions three registers, so it
208 // should really be a THREEVEC variant. But the two source registers
209 // are D registers. So we say it is just a TWOVEC insn, producing a Q
210 // and taking a single Q (q7); q7 is the d14-d15 register pair, which
211 // is why the insn itself is mentions d14 and d15 whereas the
212 // numbers that follow mention q7. The result (q7) is 128 bits wide and
213 // so is unaffected by these shenanigans.
214 GEN_TWOVEC_TEST(pmull_q_d_d
, "vmull.p64 q13, d14, d15", 13, 7)
218 // ======================== CRYPTO ========================
220 // aesd.8 q_q (aes single round decryption)
221 // aese.8 q_q (aes single round encryption)
222 // aesimc.8 q_q (aes inverse mix columns)
223 // aesmc.8 q_q (aes mix columns)
224 if (1) DO50( test_aesd_q_q(TyNONE
) );
225 if (1) DO50( test_aese_q_q(TyNONE
) );
226 if (1) DO50( test_aesimc_q_q(TyNONE
) );
227 if (1) DO50( test_aesmc_q_q(TyNONE
) );
235 if (1) DO50( test_sha1c_q_q_q(TyNONE
) );
236 if (1) DO50( test_sha1h_q_q(TyNONE
) );
237 if (1) DO50( test_sha1m_q_q_q(TyNONE
) );
238 if (1) DO50( test_sha1p_q_q_q(TyNONE
) );
239 if (1) DO50( test_sha1su0_q_q_q(TyNONE
) );
240 if (1) DO50( test_sha1su1_q_q(TyNONE
) );
245 // sha256su1.32 q_q_q
246 if (1) DO50( test_sha256h2_q_q_q(TyNONE
) );
247 if (1) DO50( test_sha256h_q_q_q(TyNONE
) );
248 if (1) DO50( test_sha256su0_q_q(TyNONE
) );
249 if (1) DO50( test_sha256su1_q_q_q(TyNONE
) );
252 if (1) DO50( test_pmull_q_d_d(TyD
) );