libsodium: Needed for Dnscrypto-proxy Release 1.3.0
[tomato.git] / release / src / router / libsodium / src / libsodium / crypto_stream / aes128ctr / portable / xor_afternm_aes128ctr.c
blob6734fb93f5454f964cd8cec9ba9ed2561a5f1270
1 /* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 * Date: 2009-03-19
3 * Public domain */
5 #include <stdio.h>
6 #include "api.h"
7 #include "int128.h"
8 #include "common.h"
9 #include "consts.h"
11 int crypto_stream_xor_afternm(unsigned char *outp, const unsigned char *inp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
14 int128 xmm0;
15 int128 xmm1;
16 int128 xmm2;
17 int128 xmm3;
18 int128 xmm4;
19 int128 xmm5;
20 int128 xmm6;
21 int128 xmm7;
23 int128 xmm8;
24 int128 xmm9;
25 int128 xmm10;
26 int128 xmm11;
27 int128 xmm12;
28 int128 xmm13;
29 int128 xmm14;
30 int128 xmm15;
32 int128 nonce_stack;
33 unsigned long long lensav;
34 unsigned char bl[128];
35 unsigned char *blp;
36 unsigned char b;
38 uint32 tmp;
40 /* Copy nonce on the stack */
41 copy2(&nonce_stack, (int128 *) (noncep + 0));
42 unsigned char *np = (unsigned char *)&nonce_stack;
44 enc_block:
46 xmm0 = *(int128 *) (np + 0);
47 copy2(&xmm1, &xmm0);
48 shufb(&xmm1, SWAP32);
49 copy2(&xmm2, &xmm1);
50 copy2(&xmm3, &xmm1);
51 copy2(&xmm4, &xmm1);
52 copy2(&xmm5, &xmm1);
53 copy2(&xmm6, &xmm1);
54 copy2(&xmm7, &xmm1);
56 add_uint32_big(&xmm1, 1);
57 add_uint32_big(&xmm2, 2);
58 add_uint32_big(&xmm3, 3);
59 add_uint32_big(&xmm4, 4);
60 add_uint32_big(&xmm5, 5);
61 add_uint32_big(&xmm6, 6);
62 add_uint32_big(&xmm7, 7);
64 shufb(&xmm0, M0);
65 shufb(&xmm1, M0SWAP);
66 shufb(&xmm2, M0SWAP);
67 shufb(&xmm3, M0SWAP);
68 shufb(&xmm4, M0SWAP);
69 shufb(&xmm5, M0SWAP);
70 shufb(&xmm6, M0SWAP);
71 shufb(&xmm7, M0SWAP);
73 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
75 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
76 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
77 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
78 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
79 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
80 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
81 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
82 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
83 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
84 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
86 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
88 if(len < 128) goto partial;
89 if(len == 128) goto full;
91 tmp = load32_bigendian(np + 12);
92 tmp += 8;
93 store32_bigendian(np + 12, tmp);
95 xor2(&xmm8, (int128 *)(inp + 0));
96 xor2(&xmm9, (int128 *)(inp + 16));
97 xor2(&xmm12, (int128 *)(inp + 32));
98 xor2(&xmm14, (int128 *)(inp + 48));
99 xor2(&xmm11, (int128 *)(inp + 64));
100 xor2(&xmm15, (int128 *)(inp + 80));
101 xor2(&xmm10, (int128 *)(inp + 96));
102 xor2(&xmm13, (int128 *)(inp + 112));
104 *(int128 *) (outp + 0) = xmm8;
105 *(int128 *) (outp + 16) = xmm9;
106 *(int128 *) (outp + 32) = xmm12;
107 *(int128 *) (outp + 48) = xmm14;
108 *(int128 *) (outp + 64) = xmm11;
109 *(int128 *) (outp + 80) = xmm15;
110 *(int128 *) (outp + 96) = xmm10;
111 *(int128 *) (outp + 112) = xmm13;
113 len -= 128;
114 inp += 128;
115 outp += 128;
117 goto enc_block;
119 partial:
121 lensav = len;
122 len >>= 4;
124 tmp = load32_bigendian(np + 12);
125 tmp += len;
126 store32_bigendian(np + 12, tmp);
128 blp = bl;
129 *(int128 *)(blp + 0) = xmm8;
130 *(int128 *)(blp + 16) = xmm9;
131 *(int128 *)(blp + 32) = xmm12;
132 *(int128 *)(blp + 48) = xmm14;
133 *(int128 *)(blp + 64) = xmm11;
134 *(int128 *)(blp + 80) = xmm15;
135 *(int128 *)(blp + 96) = xmm10;
136 *(int128 *)(blp + 112) = xmm13;
138 bytes:
140 if(lensav == 0) goto end;
142 b = blp[0];
143 b ^= *(unsigned char *)(inp + 0);
144 *(unsigned char *)(outp + 0) = b;
146 blp += 1;
147 inp +=1;
148 outp +=1;
149 lensav -= 1;
151 goto bytes;
153 full:
155 tmp = load32_bigendian(np + 12);
156 tmp += 8;
157 store32_bigendian(np + 12, tmp);
159 xor2(&xmm8, (int128 *)(inp + 0));
160 xor2(&xmm9, (int128 *)(inp + 16));
161 xor2(&xmm12, (int128 *)(inp + 32));
162 xor2(&xmm14, (int128 *)(inp + 48));
163 xor2(&xmm11, (int128 *)(inp + 64));
164 xor2(&xmm15, (int128 *)(inp + 80));
165 xor2(&xmm10, (int128 *)(inp + 96));
166 xor2(&xmm13, (int128 *)(inp + 112));
168 *(int128 *) (outp + 0) = xmm8;
169 *(int128 *) (outp + 16) = xmm9;
170 *(int128 *) (outp + 32) = xmm12;
171 *(int128 *) (outp + 48) = xmm14;
172 *(int128 *) (outp + 64) = xmm11;
173 *(int128 *) (outp + 80) = xmm15;
174 *(int128 *) (outp + 96) = xmm10;
175 *(int128 *) (outp + 112) = xmm13;
177 end:
178 return 0;