1 /* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
10 int crypto_stream_afternm(unsigned char *outp
, unsigned long long len
, const unsigned char *noncep
, const unsigned char *c
)
32 unsigned long long lensav
;
33 unsigned char bl
[128];
39 /* Copy nonce on the stack */
40 copy2(&nonce_stack
, (int128
*) (noncep
+ 0));
41 unsigned char *np
= (unsigned char *)&nonce_stack
;
45 xmm0
= *(int128
*) (np
+ 0);
55 add_uint32_big(&xmm1
, 1);
56 add_uint32_big(&xmm2
, 2);
57 add_uint32_big(&xmm3
, 3);
58 add_uint32_big(&xmm4
, 4);
59 add_uint32_big(&xmm5
, 5);
60 add_uint32_big(&xmm6
, 6);
61 add_uint32_big(&xmm7
, 7);
72 bitslice(xmm7
, xmm6
, xmm5
, xmm4
, xmm3
, xmm2
, xmm1
, xmm0
, xmm8
)
74 aesround( 1, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
75 aesround( 2, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
76 aesround( 3, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
77 aesround( 4, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
78 aesround( 5, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
79 aesround( 6, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
80 aesround( 7, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
81 aesround( 8, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
82 aesround( 9, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
, xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
,c
)
83 lastround(xmm8
, xmm9
, xmm10
, xmm11
, xmm12
, xmm13
, xmm14
, xmm15
, xmm0
, xmm1
, xmm2
, xmm3
, xmm4
, xmm5
, xmm6
, xmm7
,c
)
85 bitslice(xmm13
, xmm10
, xmm15
, xmm11
, xmm14
, xmm12
, xmm9
, xmm8
, xmm0
)
87 if(len
< 128) goto partial
;
88 if(len
== 128) goto full
;
90 tmp
= load32_bigendian(np
+ 12);
92 store32_bigendian(np
+ 12, tmp
);
94 *(int128
*) (outp
+ 0) = xmm8
;
95 *(int128
*) (outp
+ 16) = xmm9
;
96 *(int128
*) (outp
+ 32) = xmm12
;
97 *(int128
*) (outp
+ 48) = xmm14
;
98 *(int128
*) (outp
+ 64) = xmm11
;
99 *(int128
*) (outp
+ 80) = xmm15
;
100 *(int128
*) (outp
+ 96) = xmm10
;
101 *(int128
*) (outp
+ 112) = xmm13
;
113 tmp
= load32_bigendian(np
+ 12);
115 store32_bigendian(np
+ 12, tmp
);
118 *(int128
*)(blp
+ 0) = xmm8
;
119 *(int128
*)(blp
+ 16) = xmm9
;
120 *(int128
*)(blp
+ 32) = xmm12
;
121 *(int128
*)(blp
+ 48) = xmm14
;
122 *(int128
*)(blp
+ 64) = xmm11
;
123 *(int128
*)(blp
+ 80) = xmm15
;
124 *(int128
*)(blp
+ 96) = xmm10
;
125 *(int128
*)(blp
+ 112) = xmm13
;
129 if(lensav
== 0) goto end
;
132 *(unsigned char *)(outp
+ 0) = b
;
142 tmp
= load32_bigendian(np
+ 12);
144 store32_bigendian(np
+ 12, tmp
);
146 *(int128
*) (outp
+ 0) = xmm8
;
147 *(int128
*) (outp
+ 16) = xmm9
;
148 *(int128
*) (outp
+ 32) = xmm12
;
149 *(int128
*) (outp
+ 48) = xmm14
;
150 *(int128
*) (outp
+ 64) = xmm11
;
151 *(int128
*) (outp
+ 80) = xmm15
;
152 *(int128
*) (outp
+ 96) = xmm10
;
153 *(int128
*) (outp
+ 112) = xmm13
;