1 /* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
11 int crypto_stream_xor_afternm(unsigned char *outp
, const unsigned char *inp
, unsigned long long len
, const unsigned char *noncep
, const unsigned char *c
)
33 unsigned long long lensav
;
34 unsigned char bl
[128];
40 /* Copy nonce on the stack */
41 copy2(&nonce_stack
, (int128
*) (noncep
+ 0));
42 unsigned char *np
= (unsigned char *)&nonce_stack
;
46 xmm0
= *(int128
*) (np
+ 0);
56 add_uint32_big(&xmm1
, 1);
57 add_uint32_big(&xmm2
, 2);
58 add_uint32_big(&xmm3
, 3);
59 add_uint32_big(&xmm4
, 4);
60 add_uint32_big(&xmm5
, 5);
61 add_uint32_big(&xmm6
, 6);
62 add_uint32_big(&xmm7
, 7);
73 bitslice(xmm7
, xmm6
, xmm5
, xmm4
, xmm3
, xmm2
, xmm1
, xmm0
, xmm8
)
75 aesround( 1, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
76 aesround( 2, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
77 aesround( 3, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
78 aesround( 4, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
79 aesround( 5, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
80 aesround( 6, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
81 aesround( 7, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
82 aesround( 8, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
83 aesround( 9, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
84 lastround(xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
86 bitslice(xmm13
, xmm10
, xmm15
, xmm11
, xmm14
, xmm12
, xmm9
, xmm8
, xmm0
)
88 if(len
< 128) goto partial
;
89 if(len
== 128) goto full
;
91 tmp
= load32_bigendian(np
+ 12);
93 store32_bigendian(np
+ 12, tmp
);
95 xor2(&xmm8
, (int128
*)(inp
+ 0));
96 xor2(&xmm9
, (int128
*)(inp
+ 16));
97 xor2(&xmm12
, (int128
*)(inp
+ 32));
98 xor2(&xmm14
, (int128
*)(inp
+ 48));
99 xor2(&xmm11
, (int128
*)(inp
+ 64));
100 xor2(&xmm15
, (int128
*)(inp
+ 80));
101 xor2(&xmm10
, (int128
*)(inp
+ 96));
102 xor2(&xmm13
, (int128
*)(inp
+ 112));
104 *(int128
*) (outp
+ 0) = xmm8
;
105 *(int128
*) (outp
+ 16) = xmm9
;
106 *(int128
*) (outp
+ 32) = xmm12
;
107 *(int128
*) (outp
+ 48) = xmm14
;
108 *(int128
*) (outp
+ 64) = xmm11
;
109 *(int128
*) (outp
+ 80) = xmm15
;
110 *(int128
*) (outp
+ 96) = xmm10
;
111 *(int128
*) (outp
+ 112) = xmm13
;
124 tmp
= load32_bigendian(np
+ 12);
126 store32_bigendian(np
+ 12, tmp
);
129 *(int128
*)(blp
+ 0) = xmm8
;
130 *(int128
*)(blp
+ 16) = xmm9
;
131 *(int128
*)(blp
+ 32) = xmm12
;
132 *(int128
*)(blp
+ 48) = xmm14
;
133 *(int128
*)(blp
+ 64) = xmm11
;
134 *(int128
*)(blp
+ 80) = xmm15
;
135 *(int128
*)(blp
+ 96) = xmm10
;
136 *(int128
*)(blp
+ 112) = xmm13
;
140 if(lensav
== 0) goto end
;
143 b
^= *(unsigned char *)(inp
+ 0);
144 *(unsigned char *)(outp
+ 0) = b
;
155 tmp
= load32_bigendian(np
+ 12);
157 store32_bigendian(np
+ 12, tmp
);
159 xor2(&xmm8
, (int128
*)(inp
+ 0));
160 xor2(&xmm9
, (int128
*)(inp
+ 16));
161 xor2(&xmm12
, (int128
*)(inp
+ 32));
162 xor2(&xmm14
, (int128
*)(inp
+ 48));
163 xor2(&xmm11
, (int128
*)(inp
+ 64));
164 xor2(&xmm15
, (int128
*)(inp
+ 80));
165 xor2(&xmm10
, (int128
*)(inp
+ 96));
166 xor2(&xmm13
, (int128
*)(inp
+ 112));
168 *(int128
*) (outp
+ 0) = xmm8
;
169 *(int128
*) (outp
+ 16) = xmm9
;
170 *(int128
*) (outp
+ 32) = xmm12
;
171 *(int128
*) (outp
+ 48) = xmm14
;
172 *(int128
*) (outp
+ 64) = xmm11
;
173 *(int128
*) (outp
+ 80) = xmm15
;
174 *(int128
*) (outp
+ 96) = xmm10
;
175 *(int128
*) (outp
+ 112) = xmm13
;