target/ppc: Use TCG_CALL_NO_RWG_SE in fsel helper
[qemu.git] / target / arm / crypto_helper.c
blobd28690321f0b86ea1e481cddc8be6c3ab852e508
1 /*
2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
12 #include "qemu/osdep.h"
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "crypto/sm4.h"
19 #include "vec_internal.h"
21 union CRYPTO_STATE {
22 uint8_t bytes[16];
23 uint32_t words[4];
24 uint64_t l[2];
27 #if HOST_BIG_ENDIAN
28 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8])
29 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2])
30 #else
31 #define CR_ST_BYTE(state, i) ((state).bytes[i])
32 #define CR_ST_WORD(state, i) ((state).words[i])
33 #endif
36 * The caller has not been converted to full gvec, and so only
37 * modifies the low 16 bytes of the vector register.
39 static void clear_tail_16(void *vd, uint32_t desc)
41 int opr_sz = simd_oprsz(desc);
42 int max_sz = simd_maxsz(desc);
44 assert(opr_sz == 16);
45 clear_tail(vd, opr_sz, max_sz);
48 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
49 uint64_t *rm, bool decrypt)
51 static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
52 static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
53 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
54 union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
55 int i;
57 /* xor state vector with round key */
58 rk.l[0] ^= st.l[0];
59 rk.l[1] ^= st.l[1];
61 /* combine ShiftRows operation and sbox substitution */
62 for (i = 0; i < 16; i++) {
63 CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
66 rd[0] = st.l[0];
67 rd[1] = st.l[1];
70 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
72 intptr_t i, opr_sz = simd_oprsz(desc);
73 bool decrypt = simd_data(desc);
75 for (i = 0; i < opr_sz; i += 16) {
76 do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
78 clear_tail(vd, opr_sz, simd_maxsz(desc));
81 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
83 static uint32_t const mc[][256] = { {
84 /* MixColumns lookup table */
85 0x00000000, 0x03010102, 0x06020204, 0x05030306,
86 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
87 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
88 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
89 0x30101020, 0x33111122, 0x36121224, 0x35131326,
90 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
91 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
92 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
93 0x60202040, 0x63212142, 0x66222244, 0x65232346,
94 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
95 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
96 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
97 0x50303060, 0x53313162, 0x56323264, 0x55333366,
98 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
99 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
100 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
101 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
102 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
103 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
104 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
105 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
106 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
107 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
108 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
109 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
110 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
111 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
112 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
113 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
114 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
115 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
116 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
117 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
118 0x97848413, 0x94858511, 0x91868617, 0x92878715,
119 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
120 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
121 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
122 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
123 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
124 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
125 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
126 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
127 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
128 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
129 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
130 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
131 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
132 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
133 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
134 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
135 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
136 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
137 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
138 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
139 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
140 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
141 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
142 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
143 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
144 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
145 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
146 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
147 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
148 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
149 }, {
150 /* Inverse MixColumns lookup table */
151 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
152 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
153 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
154 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
155 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
156 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
157 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
158 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
159 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
160 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
161 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
162 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
163 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
164 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
165 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
166 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
167 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
168 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
169 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
170 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
171 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
172 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
173 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
174 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
175 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
176 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
177 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
178 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
179 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
180 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
181 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
182 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
183 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
184 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
185 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
186 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
187 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
188 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
189 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
190 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
191 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
192 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
193 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
194 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
195 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
196 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
197 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
198 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
199 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
200 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
201 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
202 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
203 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
204 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
205 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
206 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
207 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
208 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
209 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
210 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
211 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
212 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
213 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
214 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
215 } };
217 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
218 int i;
220 for (i = 0; i < 16; i += 4) {
221 CR_ST_WORD(st, i >> 2) =
222 mc[decrypt][CR_ST_BYTE(st, i)] ^
223 rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
224 rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
225 rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
228 rd[0] = st.l[0];
229 rd[1] = st.l[1];
232 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
234 intptr_t i, opr_sz = simd_oprsz(desc);
235 bool decrypt = simd_data(desc);
237 for (i = 0; i < opr_sz; i += 16) {
238 do_crypto_aesmc(vd + i, vm + i, decrypt);
240 clear_tail(vd, opr_sz, simd_maxsz(desc));
244 * SHA-1 logical functions
247 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
249 return (x & (y ^ z)) ^ z;
252 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
254 return x ^ y ^ z;
257 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
259 return (x & y) | ((x | y) & z);
262 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
264 uint64_t *d = vd, *n = vn, *m = vm;
265 uint64_t d0, d1;
267 d0 = d[1] ^ d[0] ^ m[0];
268 d1 = n[0] ^ d[1] ^ m[1];
269 d[0] = d0;
270 d[1] = d1;
272 clear_tail_16(vd, desc);
275 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
276 uint64_t *rm, uint32_t desc,
277 uint32_t (*fn)(union CRYPTO_STATE *d))
279 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
280 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
281 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
282 int i;
284 for (i = 0; i < 4; i++) {
285 uint32_t t = fn(&d);
287 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
288 + CR_ST_WORD(m, i);
290 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
291 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
292 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
293 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
294 CR_ST_WORD(d, 0) = t;
296 rd[0] = d.l[0];
297 rd[1] = d.l[1];
299 clear_tail_16(rd, desc);
302 static uint32_t do_sha1c(union CRYPTO_STATE *d)
304 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
307 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
309 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
312 static uint32_t do_sha1p(union CRYPTO_STATE *d)
314 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
317 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
319 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
322 static uint32_t do_sha1m(union CRYPTO_STATE *d)
324 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
327 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
329 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
332 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
334 uint64_t *rd = vd;
335 uint64_t *rm = vm;
336 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
338 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
339 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
341 rd[0] = m.l[0];
342 rd[1] = m.l[1];
344 clear_tail_16(vd, desc);
347 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
349 uint64_t *rd = vd;
350 uint64_t *rm = vm;
351 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
352 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
354 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
355 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
356 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
357 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
359 rd[0] = d.l[0];
360 rd[1] = d.l[1];
362 clear_tail_16(vd, desc);
366 * The SHA-256 logical functions, according to
367 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
370 static uint32_t S0(uint32_t x)
372 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
375 static uint32_t S1(uint32_t x)
377 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
380 static uint32_t s0(uint32_t x)
382 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
385 static uint32_t s1(uint32_t x)
387 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
390 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
392 uint64_t *rd = vd;
393 uint64_t *rn = vn;
394 uint64_t *rm = vm;
395 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
396 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
397 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
398 int i;
400 for (i = 0; i < 4; i++) {
401 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
402 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
403 + CR_ST_WORD(m, i);
405 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
406 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
407 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
408 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
410 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
411 + S0(CR_ST_WORD(d, 0));
413 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
414 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
415 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
416 CR_ST_WORD(d, 0) = t;
419 rd[0] = d.l[0];
420 rd[1] = d.l[1];
422 clear_tail_16(vd, desc);
425 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
427 uint64_t *rd = vd;
428 uint64_t *rn = vn;
429 uint64_t *rm = vm;
430 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
431 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
432 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
433 int i;
435 for (i = 0; i < 4; i++) {
436 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
437 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
438 + CR_ST_WORD(m, i);
440 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
441 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
442 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
443 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
446 rd[0] = d.l[0];
447 rd[1] = d.l[1];
449 clear_tail_16(vd, desc);
452 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
454 uint64_t *rd = vd;
455 uint64_t *rm = vm;
456 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
457 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
459 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
460 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
461 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
462 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
464 rd[0] = d.l[0];
465 rd[1] = d.l[1];
467 clear_tail_16(vd, desc);
470 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
472 uint64_t *rd = vd;
473 uint64_t *rn = vn;
474 uint64_t *rm = vm;
475 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
476 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
477 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
479 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
480 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
481 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
482 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
484 rd[0] = d.l[0];
485 rd[1] = d.l[1];
487 clear_tail_16(vd, desc);
491 * The SHA-512 logical functions (same as above but using 64-bit operands)
494 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
496 return (x & (y ^ z)) ^ z;
499 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
501 return (x & y) | ((x | y) & z);
504 static uint64_t S0_512(uint64_t x)
506 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
509 static uint64_t S1_512(uint64_t x)
511 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
514 static uint64_t s0_512(uint64_t x)
516 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
519 static uint64_t s1_512(uint64_t x)
521 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
524 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
526 uint64_t *rd = vd;
527 uint64_t *rn = vn;
528 uint64_t *rm = vm;
529 uint64_t d0 = rd[0];
530 uint64_t d1 = rd[1];
532 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
533 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
535 rd[0] = d0;
536 rd[1] = d1;
538 clear_tail_16(vd, desc);
541 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
543 uint64_t *rd = vd;
544 uint64_t *rn = vn;
545 uint64_t *rm = vm;
546 uint64_t d0 = rd[0];
547 uint64_t d1 = rd[1];
549 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
550 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
552 rd[0] = d0;
553 rd[1] = d1;
555 clear_tail_16(vd, desc);
558 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
560 uint64_t *rd = vd;
561 uint64_t *rn = vn;
562 uint64_t d0 = rd[0];
563 uint64_t d1 = rd[1];
565 d0 += s0_512(rd[1]);
566 d1 += s0_512(rn[0]);
568 rd[0] = d0;
569 rd[1] = d1;
571 clear_tail_16(vd, desc);
574 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
576 uint64_t *rd = vd;
577 uint64_t *rn = vn;
578 uint64_t *rm = vm;
580 rd[0] += s1_512(rn[0]) + rm[0];
581 rd[1] += s1_512(rn[1]) + rm[1];
583 clear_tail_16(vd, desc);
586 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
588 uint64_t *rd = vd;
589 uint64_t *rn = vn;
590 uint64_t *rm = vm;
591 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
592 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
593 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
594 uint32_t t;
596 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
597 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
599 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
600 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
602 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
603 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
605 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
606 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
608 rd[0] = d.l[0];
609 rd[1] = d.l[1];
611 clear_tail_16(vd, desc);
614 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
616 uint64_t *rd = vd;
617 uint64_t *rn = vn;
618 uint64_t *rm = vm;
619 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
620 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
621 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
622 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
624 CR_ST_WORD(d, 0) ^= t;
625 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
626 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
627 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
628 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
630 rd[0] = d.l[0];
631 rd[1] = d.l[1];
633 clear_tail_16(vd, desc);
636 static inline void QEMU_ALWAYS_INLINE
637 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
638 uint32_t desc, uint32_t opcode)
640 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
641 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
642 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
643 uint32_t imm2 = simd_data(desc);
644 uint32_t t;
646 assert(imm2 < 4);
648 if (opcode == 0 || opcode == 2) {
649 /* SM3TT1A, SM3TT2A */
650 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
651 } else if (opcode == 1) {
652 /* SM3TT1B */
653 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
654 } else if (opcode == 3) {
655 /* SM3TT2B */
656 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
657 } else {
658 qemu_build_not_reached();
661 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
663 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
665 if (opcode < 2) {
666 /* SM3TT1A, SM3TT1B */
667 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
669 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
670 } else {
671 /* SM3TT2A, SM3TT2B */
672 t += CR_ST_WORD(n, 3);
673 t ^= rol32(t, 9) ^ rol32(t, 17);
675 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
678 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
679 CR_ST_WORD(d, 3) = t;
681 rd[0] = d.l[0];
682 rd[1] = d.l[1];
684 clear_tail_16(rd, desc);
687 #define DO_SM3TT(NAME, OPCODE) \
688 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
689 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
691 DO_SM3TT(crypto_sm3tt1a, 0)
692 DO_SM3TT(crypto_sm3tt1b, 1)
693 DO_SM3TT(crypto_sm3tt2a, 2)
694 DO_SM3TT(crypto_sm3tt2b, 3)
696 #undef DO_SM3TT
698 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
700 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
701 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
702 uint32_t t, i;
704 for (i = 0; i < 4; i++) {
705 t = CR_ST_WORD(d, (i + 1) % 4) ^
706 CR_ST_WORD(d, (i + 2) % 4) ^
707 CR_ST_WORD(d, (i + 3) % 4) ^
708 CR_ST_WORD(n, i);
710 t = sm4_sbox[t & 0xff] |
711 sm4_sbox[(t >> 8) & 0xff] << 8 |
712 sm4_sbox[(t >> 16) & 0xff] << 16 |
713 sm4_sbox[(t >> 24) & 0xff] << 24;
715 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
716 rol32(t, 24);
719 rd[0] = d.l[0];
720 rd[1] = d.l[1];
723 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
725 intptr_t i, opr_sz = simd_oprsz(desc);
727 for (i = 0; i < opr_sz; i += 16) {
728 do_crypto_sm4e(vd + i, vn + i, vm + i);
730 clear_tail(vd, opr_sz, simd_maxsz(desc));
733 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
735 union CRYPTO_STATE d;
736 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
737 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
738 uint32_t t, i;
740 d = n;
741 for (i = 0; i < 4; i++) {
742 t = CR_ST_WORD(d, (i + 1) % 4) ^
743 CR_ST_WORD(d, (i + 2) % 4) ^
744 CR_ST_WORD(d, (i + 3) % 4) ^
745 CR_ST_WORD(m, i);
747 t = sm4_sbox[t & 0xff] |
748 sm4_sbox[(t >> 8) & 0xff] << 8 |
749 sm4_sbox[(t >> 16) & 0xff] << 16 |
750 sm4_sbox[(t >> 24) & 0xff] << 24;
752 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
755 rd[0] = d.l[0];
756 rd[1] = d.l[1];
759 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
761 intptr_t i, opr_sz = simd_oprsz(desc);
763 for (i = 0; i < opr_sz; i += 16) {
764 do_crypto_sm4ekey(vd + i, vn + i, vm + i);
766 clear_tail(vd, opr_sz, simd_maxsz(desc));
769 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
771 intptr_t i, opr_sz = simd_oprsz(desc);
772 uint64_t *d = vd, *n = vn, *m = vm;
774 for (i = 0; i < opr_sz / 8; ++i) {
775 d[i] = n[i] ^ rol64(m[i], 1);
777 clear_tail(vd, opr_sz, simd_maxsz(desc));