libsodium: Needed for Dnscrypto-proxy Release 1.3.0
[tomato.git] / release / src / router / libsodium / src / libsodium / crypto_stream / aes128ctr / portable / afternm_aes128ctr.c
blob4a3785eb17559131265a84261cb8eb4abe8458cb
1 /* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 * Date: 2009-03-19
3 * Public domain */
5 #include "api.h"
6 #include "int128.h"
7 #include "common.h"
8 #include "consts.h"
10 int crypto_stream_afternm(unsigned char *outp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
13 int128 xmm0;
14 int128 xmm1;
15 int128 xmm2;
16 int128 xmm3;
17 int128 xmm4;
18 int128 xmm5;
19 int128 xmm6;
20 int128 xmm7;
22 int128 xmm8;
23 int128 xmm9;
24 int128 xmm10;
25 int128 xmm11;
26 int128 xmm12;
27 int128 xmm13;
28 int128 xmm14;
29 int128 xmm15;
31 int128 nonce_stack;
32 unsigned long long lensav;
33 unsigned char bl[128];
34 unsigned char *blp;
35 unsigned char b;
37 uint32 tmp;
39 /* Copy nonce on the stack */
40 copy2(&nonce_stack, (int128 *) (noncep + 0));
41 unsigned char *np = (unsigned char *)&nonce_stack;
43 enc_block:
45 xmm0 = *(int128 *) (np + 0);
46 copy2(&xmm1, &xmm0);
47 shufb(&xmm1, SWAP32);
48 copy2(&xmm2, &xmm1);
49 copy2(&xmm3, &xmm1);
50 copy2(&xmm4, &xmm1);
51 copy2(&xmm5, &xmm1);
52 copy2(&xmm6, &xmm1);
53 copy2(&xmm7, &xmm1);
55 add_uint32_big(&xmm1, 1);
56 add_uint32_big(&xmm2, 2);
57 add_uint32_big(&xmm3, 3);
58 add_uint32_big(&xmm4, 4);
59 add_uint32_big(&xmm5, 5);
60 add_uint32_big(&xmm6, 6);
61 add_uint32_big(&xmm7, 7);
63 shufb(&xmm0, M0);
64 shufb(&xmm1, M0SWAP);
65 shufb(&xmm2, M0SWAP);
66 shufb(&xmm3, M0SWAP);
67 shufb(&xmm4, M0SWAP);
68 shufb(&xmm5, M0SWAP);
69 shufb(&xmm6, M0SWAP);
70 shufb(&xmm7, M0SWAP);
72 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
74 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
75 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
76 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
77 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
78 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
79 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
80 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
81 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
82 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
83 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
85 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
87 if(len < 128) goto partial;
88 if(len == 128) goto full;
90 tmp = load32_bigendian(np + 12);
91 tmp += 8;
92 store32_bigendian(np + 12, tmp);
94 *(int128 *) (outp + 0) = xmm8;
95 *(int128 *) (outp + 16) = xmm9;
96 *(int128 *) (outp + 32) = xmm12;
97 *(int128 *) (outp + 48) = xmm14;
98 *(int128 *) (outp + 64) = xmm11;
99 *(int128 *) (outp + 80) = xmm15;
100 *(int128 *) (outp + 96) = xmm10;
101 *(int128 *) (outp + 112) = xmm13;
103 len -= 128;
104 outp += 128;
106 goto enc_block;
108 partial:
110 lensav = len;
111 len >>= 4;
113 tmp = load32_bigendian(np + 12);
114 tmp += len;
115 store32_bigendian(np + 12, tmp);
117 blp = bl;
118 *(int128 *)(blp + 0) = xmm8;
119 *(int128 *)(blp + 16) = xmm9;
120 *(int128 *)(blp + 32) = xmm12;
121 *(int128 *)(blp + 48) = xmm14;
122 *(int128 *)(blp + 64) = xmm11;
123 *(int128 *)(blp + 80) = xmm15;
124 *(int128 *)(blp + 96) = xmm10;
125 *(int128 *)(blp + 112) = xmm13;
127 bytes:
129 if(lensav == 0) goto end;
131 b = blp[0];
132 *(unsigned char *)(outp + 0) = b;
134 blp += 1;
135 outp +=1;
136 lensav -= 1;
138 goto bytes;
140 full:
142 tmp = load32_bigendian(np + 12);
143 tmp += 8;
144 store32_bigendian(np + 12, tmp);
146 *(int128 *) (outp + 0) = xmm8;
147 *(int128 *) (outp + 16) = xmm9;
148 *(int128 *) (outp + 32) = xmm12;
149 *(int128 *) (outp + 48) = xmm14;
150 *(int128 *) (outp + 64) = xmm11;
151 *(int128 *) (outp + 80) = xmm15;
152 *(int128 *) (outp + 96) = xmm10;
153 *(int128 *) (outp + 112) = xmm13;
155 end:
156 return 0;