2 * ChaCha20-Poly1305 Implementation for SSH-2
5 * http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/ssh/PROTOCOL.chacha20poly1305?rev=1.2&content-type=text/x-cvsweb-markup
8 * http://cr.yp.to/chacha/chacha-20080128.pdf
11 * http://cr.yp.to/snuffle/spec.pdf
14 * http://cr.yp.to/mac/poly1305-20050329.pdf
16 * The nonce for the Poly1305 is the second part of the key output
17 * from the first round of ChaCha20. This removes the AES requirement.
18 * This is undocumented!
20 * This has an intricate link between the cipher and the MAC. The
21 * keying of both is done in by the cipher and setting of the IV is
22 * done by the MAC. One cannot operate without the other. The
23 * configuration of the ssh2_cipher structure ensures that the MAC is
24 * set (and others ignored) if this cipher is chosen.
26 * This cipher also encrypts the length using a different
27 * instantiation of the cipher using a different key and IV made from
28 * the sequence number which is passed in addition when calling
29 * encrypt/decrypt on it.
39 /* ChaCha20 implementation, only supporting 256-bit keys */
41 /* State for each ChaCha20 instance */
43 /* Current context, usually with the count incremented
44 * 0-3 are the static constant
46 * 12-13 are the counter
49 /* The output of the state above ready to xor */
50 unsigned char current
[64];
51 /* The index of the above currently used to allow a true streaming cipher */
55 static INLINE
void chacha20_round(struct chacha20
*ctx
)
61 memcpy(copy
, ctx
->state
, sizeof(copy
));
63 /* A circular rotation for a 32bit number */
64 #define rotl(x, shift) x = ((x << shift) | (x >> (32 - shift)))
66 /* What to do for each quarter round operation */
67 #define qrop(a, b, c, d) \
73 #define quarter(a, b, c, d) \
79 /* Do 20 rounds, in pairs because every other is different */
80 for (i
= 0; i
< 20; i
+= 2) {
84 quarter(2, 6, 10, 14);
85 quarter(3, 7, 11, 15);
86 /* Another slightly different round */
87 quarter(0, 5, 10, 15);
88 quarter(1, 6, 11, 12);
93 /* Dump the macros, don't need them littering */
98 /* Add the initial state */
99 for (i
= 0; i
< 16; ++i
) {
100 copy
[i
] += ctx
->state
[i
];
103 /* Update the content of the xor buffer */
104 for (i
= 0; i
< 16; ++i
) {
105 ctx
->current
[i
* 4 + 0] = copy
[i
] >> 0;
106 ctx
->current
[i
* 4 + 1] = copy
[i
] >> 8;
107 ctx
->current
[i
* 4 + 2] = copy
[i
] >> 16;
108 ctx
->current
[i
* 4 + 3] = copy
[i
] >> 24;
110 /* State full, reset pointer to beginning */
111 ctx
->currentIndex
= 0;
112 smemclr(copy
, sizeof(copy
));
114 /* Increment round counter */
116 /* Check for overflow, not done in one line so the 32 bits are chopped by the type */
117 if (!(uint32
)(ctx
->state
[12])) {
122 /* Initialise context with 256bit key */
123 static void chacha20_key(struct chacha20
*ctx
, const unsigned char *key
)
125 static const char constant
[16] = "expand 32-byte k";
127 /* Add the fixed string to the start of the state */
128 ctx
->state
[0] = GET_32BIT_LSB_FIRST(constant
+ 0);
129 ctx
->state
[1] = GET_32BIT_LSB_FIRST(constant
+ 4);
130 ctx
->state
[2] = GET_32BIT_LSB_FIRST(constant
+ 8);
131 ctx
->state
[3] = GET_32BIT_LSB_FIRST(constant
+ 12);
134 ctx
->state
[4] = GET_32BIT_LSB_FIRST(key
+ 0);
135 ctx
->state
[5] = GET_32BIT_LSB_FIRST(key
+ 4);
136 ctx
->state
[6] = GET_32BIT_LSB_FIRST(key
+ 8);
137 ctx
->state
[7] = GET_32BIT_LSB_FIRST(key
+ 12);
138 ctx
->state
[8] = GET_32BIT_LSB_FIRST(key
+ 16);
139 ctx
->state
[9] = GET_32BIT_LSB_FIRST(key
+ 20);
140 ctx
->state
[10] = GET_32BIT_LSB_FIRST(key
+ 24);
141 ctx
->state
[11] = GET_32BIT_LSB_FIRST(key
+ 28);
143 /* New key, dump context */
144 ctx
->currentIndex
= 64;
147 static void chacha20_iv(struct chacha20
*ctx
, const unsigned char *iv
)
151 ctx
->state
[14] = GET_32BIT_MSB_FIRST(iv
);
152 ctx
->state
[15] = GET_32BIT_MSB_FIRST(iv
+ 4);
154 /* New IV, dump context */
155 ctx
->currentIndex
= 64;
158 static void chacha20_encrypt(struct chacha20
*ctx
, unsigned char *blk
, int len
)
161 /* If we don't have any state left, then cycle to the next */
162 if (ctx
->currentIndex
>= 64) {
166 /* Do the xor while there's some state left and some plaintext left */
167 while (ctx
->currentIndex
< 64 && len
) {
168 *blk
++ ^= ctx
->current
[ctx
->currentIndex
++];
174 /* Decrypt is encrypt... It's xor against a PRNG... */
175 static INLINE
void chacha20_decrypt(struct chacha20
*ctx
,
176 unsigned char *blk
, int len
)
178 chacha20_encrypt(ctx
, blk
, len
);
181 /* Poly1305 implementation (no AES, nonce is not encrypted) */
183 #define NWORDS ((130 + BIGNUM_INT_BITS-1) / BIGNUM_INT_BITS)
184 typedef struct bigval
{
188 static void bigval_clear(bigval
*r
)
191 for (i
= 0; i
< NWORDS
; i
++)
195 static void bigval_import_le(bigval
*r
, const void *vdata
, int len
)
197 const unsigned char *data
= (const unsigned char *)vdata
;
200 for (i
= 0; i
< len
; i
++)
201 r
->w
[i
/ BIGNUM_INT_BYTES
] |=
202 (BignumInt
)data
[i
] << (8 * (i
% BIGNUM_INT_BYTES
));
205 static void bigval_export_le(const bigval
*r
, void *vdata
, int len
)
207 unsigned char *data
= (unsigned char *)vdata
;
209 for (i
= 0; i
< len
; i
++)
210 data
[i
] = r
->w
[i
/ BIGNUM_INT_BYTES
] >> (8 * (i
% BIGNUM_INT_BYTES
));
214 * Core functions to do arithmetic mod p = 2^130-5. The whole
215 * collection of these, up to and including the surrounding #if, are
216 * generated automatically for various sizes of BignumInt by
217 * contrib/make1305.py.
220 #if BIGNUM_INT_BITS == 16
222 static void bigval_add(bigval
*r
, const bigval
*a
, const bigval
*b
)
224 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
, v9
, v10
, v11
, v12
, v13
, v14
;
225 BignumInt v15
, v16
, v17
, v18
, v19
, v20
, v21
, v22
, v23
, v24
, v25
, v26
;
246 BignumADC(v18
, carry
, v0
, v9
, 0);
247 BignumADC(v19
, carry
, v1
, v10
, carry
);
248 BignumADC(v20
, carry
, v2
, v11
, carry
);
249 BignumADC(v21
, carry
, v3
, v12
, carry
);
250 BignumADC(v22
, carry
, v4
, v13
, carry
);
251 BignumADC(v23
, carry
, v5
, v14
, carry
);
252 BignumADC(v24
, carry
, v6
, v15
, carry
);
253 BignumADC(v25
, carry
, v7
, v16
, carry
);
254 v26
= v8
+ v17
+ carry
;
266 static void bigval_mul_mod_p(bigval
*r
, const bigval
*a
, const bigval
*b
)
268 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
, v9
, v10
, v11
, v12
, v13
, v14
;
269 BignumInt v15
, v16
, v17
, v18
, v19
, v20
, v21
, v22
, v23
, v24
, v25
, v26
, v27
;
270 BignumInt v28
, v29
, v30
, v31
, v32
, v33
, v34
, v35
, v36
, v37
, v38
, v39
, v40
;
271 BignumInt v41
, v42
, v43
, v44
, v45
, v46
, v47
, v48
, v49
, v50
, v51
, v52
, v53
;
272 BignumInt v54
, v55
, v56
, v57
, v58
, v59
, v60
, v61
, v62
, v63
, v64
, v65
, v66
;
273 BignumInt v67
, v68
, v69
, v70
, v71
, v72
, v73
, v74
, v75
, v76
, v77
, v78
, v79
;
274 BignumInt v80
, v81
, v82
, v83
, v84
, v85
, v86
, v87
, v88
, v89
, v90
, v91
, v92
;
275 BignumInt v93
, v94
, v95
, v96
, v97
, v98
, v99
, v100
, v101
, v102
, v103
, v104
;
276 BignumInt v105
, v106
, v107
, v108
, v109
, v110
, v111
, v112
, v113
, v114
;
277 BignumInt v115
, v116
, v117
, v118
, v119
, v120
, v121
, v122
, v123
, v124
;
278 BignumInt v125
, v126
, v127
, v128
, v129
, v130
, v131
, v132
, v133
, v134
;
279 BignumInt v135
, v136
, v137
, v138
, v139
, v140
, v141
, v142
, v143
, v144
;
280 BignumInt v145
, v146
, v147
, v148
, v149
, v150
, v151
, v152
, v153
, v154
;
281 BignumInt v155
, v156
, v157
, v158
, v159
, v160
, v161
, v162
, v163
, v164
;
282 BignumInt v165
, v166
, v167
, v168
, v169
, v170
, v171
, v172
, v173
, v174
;
283 BignumInt v175
, v176
, v177
, v178
, v180
, v181
, v182
, v183
, v184
, v185
;
284 BignumInt v186
, v187
, v188
, v189
, v190
, v191
, v192
, v193
, v194
, v195
;
285 BignumInt v196
, v197
, v198
, v199
, v200
, v201
, v202
, v203
, v204
, v205
;
286 BignumInt v206
, v207
, v208
, v210
, v212
, v213
, v214
, v215
, v216
, v217
;
287 BignumInt v218
, v219
, v220
, v221
, v222
, v223
, v224
, v225
, v226
, v227
;
288 BignumInt v228
, v229
;
309 BignumMUL(v19
, v18
, v0
, v9
);
310 BignumMULADD(v21
, v20
, v0
, v10
, v19
);
311 BignumMULADD(v23
, v22
, v0
, v11
, v21
);
312 BignumMULADD(v25
, v24
, v0
, v12
, v23
);
313 BignumMULADD(v27
, v26
, v0
, v13
, v25
);
314 BignumMULADD(v29
, v28
, v0
, v14
, v27
);
315 BignumMULADD(v31
, v30
, v0
, v15
, v29
);
316 BignumMULADD(v33
, v32
, v0
, v16
, v31
);
317 BignumMULADD(v35
, v34
, v0
, v17
, v33
);
318 BignumMULADD(v37
, v36
, v1
, v9
, v20
);
319 BignumMULADD2(v39
, v38
, v1
, v10
, v22
, v37
);
320 BignumMULADD2(v41
, v40
, v1
, v11
, v24
, v39
);
321 BignumMULADD2(v43
, v42
, v1
, v12
, v26
, v41
);
322 BignumMULADD2(v45
, v44
, v1
, v13
, v28
, v43
);
323 BignumMULADD2(v47
, v46
, v1
, v14
, v30
, v45
);
324 BignumMULADD2(v49
, v48
, v1
, v15
, v32
, v47
);
325 BignumMULADD2(v51
, v50
, v1
, v16
, v34
, v49
);
326 BignumMULADD2(v53
, v52
, v1
, v17
, v35
, v51
);
327 BignumMULADD(v55
, v54
, v2
, v9
, v38
);
328 BignumMULADD2(v57
, v56
, v2
, v10
, v40
, v55
);
329 BignumMULADD2(v59
, v58
, v2
, v11
, v42
, v57
);
330 BignumMULADD2(v61
, v60
, v2
, v12
, v44
, v59
);
331 BignumMULADD2(v63
, v62
, v2
, v13
, v46
, v61
);
332 BignumMULADD2(v65
, v64
, v2
, v14
, v48
, v63
);
333 BignumMULADD2(v67
, v66
, v2
, v15
, v50
, v65
);
334 BignumMULADD2(v69
, v68
, v2
, v16
, v52
, v67
);
335 BignumMULADD2(v71
, v70
, v2
, v17
, v53
, v69
);
336 BignumMULADD(v73
, v72
, v3
, v9
, v56
);
337 BignumMULADD2(v75
, v74
, v3
, v10
, v58
, v73
);
338 BignumMULADD2(v77
, v76
, v3
, v11
, v60
, v75
);
339 BignumMULADD2(v79
, v78
, v3
, v12
, v62
, v77
);
340 BignumMULADD2(v81
, v80
, v3
, v13
, v64
, v79
);
341 BignumMULADD2(v83
, v82
, v3
, v14
, v66
, v81
);
342 BignumMULADD2(v85
, v84
, v3
, v15
, v68
, v83
);
343 BignumMULADD2(v87
, v86
, v3
, v16
, v70
, v85
);
344 BignumMULADD2(v89
, v88
, v3
, v17
, v71
, v87
);
345 BignumMULADD(v91
, v90
, v4
, v9
, v74
);
346 BignumMULADD2(v93
, v92
, v4
, v10
, v76
, v91
);
347 BignumMULADD2(v95
, v94
, v4
, v11
, v78
, v93
);
348 BignumMULADD2(v97
, v96
, v4
, v12
, v80
, v95
);
349 BignumMULADD2(v99
, v98
, v4
, v13
, v82
, v97
);
350 BignumMULADD2(v101
, v100
, v4
, v14
, v84
, v99
);
351 BignumMULADD2(v103
, v102
, v4
, v15
, v86
, v101
);
352 BignumMULADD2(v105
, v104
, v4
, v16
, v88
, v103
);
353 BignumMULADD2(v107
, v106
, v4
, v17
, v89
, v105
);
354 BignumMULADD(v109
, v108
, v5
, v9
, v92
);
355 BignumMULADD2(v111
, v110
, v5
, v10
, v94
, v109
);
356 BignumMULADD2(v113
, v112
, v5
, v11
, v96
, v111
);
357 BignumMULADD2(v115
, v114
, v5
, v12
, v98
, v113
);
358 BignumMULADD2(v117
, v116
, v5
, v13
, v100
, v115
);
359 BignumMULADD2(v119
, v118
, v5
, v14
, v102
, v117
);
360 BignumMULADD2(v121
, v120
, v5
, v15
, v104
, v119
);
361 BignumMULADD2(v123
, v122
, v5
, v16
, v106
, v121
);
362 BignumMULADD2(v125
, v124
, v5
, v17
, v107
, v123
);
363 BignumMULADD(v127
, v126
, v6
, v9
, v110
);
364 BignumMULADD2(v129
, v128
, v6
, v10
, v112
, v127
);
365 BignumMULADD2(v131
, v130
, v6
, v11
, v114
, v129
);
366 BignumMULADD2(v133
, v132
, v6
, v12
, v116
, v131
);
367 BignumMULADD2(v135
, v134
, v6
, v13
, v118
, v133
);
368 BignumMULADD2(v137
, v136
, v6
, v14
, v120
, v135
);
369 BignumMULADD2(v139
, v138
, v6
, v15
, v122
, v137
);
370 BignumMULADD2(v141
, v140
, v6
, v16
, v124
, v139
);
371 BignumMULADD2(v143
, v142
, v6
, v17
, v125
, v141
);
372 BignumMULADD(v145
, v144
, v7
, v9
, v128
);
373 BignumMULADD2(v147
, v146
, v7
, v10
, v130
, v145
);
374 BignumMULADD2(v149
, v148
, v7
, v11
, v132
, v147
);
375 BignumMULADD2(v151
, v150
, v7
, v12
, v134
, v149
);
376 BignumMULADD2(v153
, v152
, v7
, v13
, v136
, v151
);
377 BignumMULADD2(v155
, v154
, v7
, v14
, v138
, v153
);
378 BignumMULADD2(v157
, v156
, v7
, v15
, v140
, v155
);
379 BignumMULADD2(v159
, v158
, v7
, v16
, v142
, v157
);
380 BignumMULADD2(v161
, v160
, v7
, v17
, v143
, v159
);
381 BignumMULADD(v163
, v162
, v8
, v9
, v146
);
382 BignumMULADD2(v165
, v164
, v8
, v10
, v148
, v163
);
383 BignumMULADD2(v167
, v166
, v8
, v11
, v150
, v165
);
384 BignumMULADD2(v169
, v168
, v8
, v12
, v152
, v167
);
385 BignumMULADD2(v171
, v170
, v8
, v13
, v154
, v169
);
386 BignumMULADD2(v173
, v172
, v8
, v14
, v156
, v171
);
387 BignumMULADD2(v175
, v174
, v8
, v15
, v158
, v173
);
388 BignumMULADD2(v177
, v176
, v8
, v16
, v160
, v175
);
389 v178
= v8
* v17
+ v161
+ v177
;
390 v180
= (v162
) & ((((BignumInt
)1) << 2)-1);
391 v181
= ((v162
) >> 2) | ((v164
) << 14);
392 v182
= ((v164
) >> 2) | ((v166
) << 14);
393 v183
= ((v166
) >> 2) | ((v168
) << 14);
394 v184
= ((v168
) >> 2) | ((v170
) << 14);
395 v185
= ((v170
) >> 2) | ((v172
) << 14);
396 v186
= ((v172
) >> 2) | ((v174
) << 14);
397 v187
= ((v174
) >> 2) | ((v176
) << 14);
398 v188
= ((v176
) >> 2) | ((v178
) << 14);
400 v190
= (v189
) & ((((BignumInt
)1) << 2)-1);
402 BignumMUL(v193
, v192
, 5, v181
);
403 BignumMULADD(v195
, v194
, 5, v182
, v193
);
404 BignumMULADD(v197
, v196
, 5, v183
, v195
);
405 BignumMULADD(v199
, v198
, 5, v184
, v197
);
406 BignumMULADD(v201
, v200
, 5, v185
, v199
);
407 BignumMULADD(v203
, v202
, 5, v186
, v201
);
408 BignumMULADD(v205
, v204
, 5, v187
, v203
);
409 BignumMULADD(v207
, v206
, 5, v188
, v205
);
410 v208
= 5 * v190
+ v207
;
412 BignumADC(v212
, carry
, v18
, v192
, 0);
413 BignumADC(v213
, carry
, v36
, v194
, carry
);
414 BignumADC(v214
, carry
, v54
, v196
, carry
);
415 BignumADC(v215
, carry
, v72
, v198
, carry
);
416 BignumADC(v216
, carry
, v90
, v200
, carry
);
417 BignumADC(v217
, carry
, v108
, v202
, carry
);
418 BignumADC(v218
, carry
, v126
, v204
, carry
);
419 BignumADC(v219
, carry
, v144
, v206
, carry
);
420 v220
= v180
+ v208
+ carry
;
421 BignumADC(v221
, carry
, v212
, v210
, 0);
422 BignumADC(v222
, carry
, v213
, 0, carry
);
423 BignumADC(v223
, carry
, v214
, 0, carry
);
424 BignumADC(v224
, carry
, v215
, 0, carry
);
425 BignumADC(v225
, carry
, v216
, 0, carry
);
426 BignumADC(v226
, carry
, v217
, 0, carry
);
427 BignumADC(v227
, carry
, v218
, 0, carry
);
428 BignumADC(v228
, carry
, v219
, 0, carry
);
429 v229
= v220
+ 0 + carry
;
441 static void bigval_final_reduce(bigval
*n
)
443 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
, v9
, v10
, v11
, v13
, v14
, v15
;
444 BignumInt v16
, v17
, v18
, v19
, v20
, v21
, v22
, v23
, v24
, v25
, v26
, v27
, v28
;
445 BignumInt v29
, v30
, v31
, v32
, v34
, v35
, v36
, v37
, v38
, v39
, v40
, v41
, v42
;
459 v10
= (v8
) & ((((BignumInt
)1) << 2)-1);
461 BignumADC(v13
, carry
, v0
, v11
, 0);
462 BignumADC(v14
, carry
, v1
, 0, carry
);
463 BignumADC(v15
, carry
, v2
, 0, carry
);
464 BignumADC(v16
, carry
, v3
, 0, carry
);
465 BignumADC(v17
, carry
, v4
, 0, carry
);
466 BignumADC(v18
, carry
, v5
, 0, carry
);
467 BignumADC(v19
, carry
, v6
, 0, carry
);
468 BignumADC(v20
, carry
, v7
, 0, carry
);
469 v21
= v10
+ 0 + carry
;
470 BignumADC(v22
, carry
, v13
, 5, 0);
472 BignumADC(v23
, carry
, v14
, 0, carry
);
474 BignumADC(v24
, carry
, v15
, 0, carry
);
476 BignumADC(v25
, carry
, v16
, 0, carry
);
478 BignumADC(v26
, carry
, v17
, 0, carry
);
480 BignumADC(v27
, carry
, v18
, 0, carry
);
482 BignumADC(v28
, carry
, v19
, 0, carry
);
484 BignumADC(v29
, carry
, v20
, 0, carry
);
486 v30
= v21
+ 0 + carry
;
489 BignumADC(v34
, carry
, v13
, v32
, 0);
490 BignumADC(v35
, carry
, v14
, 0, carry
);
491 BignumADC(v36
, carry
, v15
, 0, carry
);
492 BignumADC(v37
, carry
, v16
, 0, carry
);
493 BignumADC(v38
, carry
, v17
, 0, carry
);
494 BignumADC(v39
, carry
, v18
, 0, carry
);
495 BignumADC(v40
, carry
, v19
, 0, carry
);
496 BignumADC(v41
, carry
, v20
, 0, carry
);
497 v42
= v21
+ 0 + carry
;
498 v43
= (v42
) & ((((BignumInt
)1) << 2)-1);
510 #elif BIGNUM_INT_BITS == 32
512 static void bigval_add(bigval
*r
, const bigval
*a
, const bigval
*b
)
514 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
, v9
, v10
, v11
, v12
, v13
, v14
;
527 BignumADC(v10
, carry
, v0
, v5
, 0);
528 BignumADC(v11
, carry
, v1
, v6
, carry
);
529 BignumADC(v12
, carry
, v2
, v7
, carry
);
530 BignumADC(v13
, carry
, v3
, v8
, carry
);
531 v14
= v4
+ v9
+ carry
;
539 static void bigval_mul_mod_p(bigval
*r
, const bigval
*a
, const bigval
*b
)
541 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
, v9
, v10
, v11
, v12
, v13
, v14
;
542 BignumInt v15
, v16
, v17
, v18
, v19
, v20
, v21
, v22
, v23
, v24
, v25
, v26
, v27
;
543 BignumInt v28
, v29
, v30
, v31
, v32
, v33
, v34
, v35
, v36
, v37
, v38
, v39
, v40
;
544 BignumInt v41
, v42
, v43
, v44
, v45
, v46
, v47
, v48
, v49
, v50
, v51
, v52
, v53
;
545 BignumInt v54
, v55
, v56
, v57
, v58
, v60
, v61
, v62
, v63
, v64
, v65
, v66
, v67
;
546 BignumInt v68
, v69
, v70
, v71
, v72
, v73
, v74
, v75
, v76
, v78
, v80
, v81
, v82
;
547 BignumInt v83
, v84
, v85
, v86
, v87
, v88
, v89
;
560 BignumMUL(v11
, v10
, v0
, v5
);
561 BignumMULADD(v13
, v12
, v0
, v6
, v11
);
562 BignumMULADD(v15
, v14
, v0
, v7
, v13
);
563 BignumMULADD(v17
, v16
, v0
, v8
, v15
);
564 BignumMULADD(v19
, v18
, v0
, v9
, v17
);
565 BignumMULADD(v21
, v20
, v1
, v5
, v12
);
566 BignumMULADD2(v23
, v22
, v1
, v6
, v14
, v21
);
567 BignumMULADD2(v25
, v24
, v1
, v7
, v16
, v23
);
568 BignumMULADD2(v27
, v26
, v1
, v8
, v18
, v25
);
569 BignumMULADD2(v29
, v28
, v1
, v9
, v19
, v27
);
570 BignumMULADD(v31
, v30
, v2
, v5
, v22
);
571 BignumMULADD2(v33
, v32
, v2
, v6
, v24
, v31
);
572 BignumMULADD2(v35
, v34
, v2
, v7
, v26
, v33
);
573 BignumMULADD2(v37
, v36
, v2
, v8
, v28
, v35
);
574 BignumMULADD2(v39
, v38
, v2
, v9
, v29
, v37
);
575 BignumMULADD(v41
, v40
, v3
, v5
, v32
);
576 BignumMULADD2(v43
, v42
, v3
, v6
, v34
, v41
);
577 BignumMULADD2(v45
, v44
, v3
, v7
, v36
, v43
);
578 BignumMULADD2(v47
, v46
, v3
, v8
, v38
, v45
);
579 BignumMULADD2(v49
, v48
, v3
, v9
, v39
, v47
);
580 BignumMULADD(v51
, v50
, v4
, v5
, v42
);
581 BignumMULADD2(v53
, v52
, v4
, v6
, v44
, v51
);
582 BignumMULADD2(v55
, v54
, v4
, v7
, v46
, v53
);
583 BignumMULADD2(v57
, v56
, v4
, v8
, v48
, v55
);
584 v58
= v4
* v9
+ v49
+ v57
;
585 v60
= (v50
) & ((((BignumInt
)1) << 2)-1);
586 v61
= ((v50
) >> 2) | ((v52
) << 30);
587 v62
= ((v52
) >> 2) | ((v54
) << 30);
588 v63
= ((v54
) >> 2) | ((v56
) << 30);
589 v64
= ((v56
) >> 2) | ((v58
) << 30);
591 v66
= (v65
) & ((((BignumInt
)1) << 2)-1);
593 BignumMUL(v69
, v68
, 5, v61
);
594 BignumMULADD(v71
, v70
, 5, v62
, v69
);
595 BignumMULADD(v73
, v72
, 5, v63
, v71
);
596 BignumMULADD(v75
, v74
, 5, v64
, v73
);
599 BignumADC(v80
, carry
, v10
, v68
, 0);
600 BignumADC(v81
, carry
, v20
, v70
, carry
);
601 BignumADC(v82
, carry
, v30
, v72
, carry
);
602 BignumADC(v83
, carry
, v40
, v74
, carry
);
603 v84
= v60
+ v76
+ carry
;
604 BignumADC(v85
, carry
, v80
, v78
, 0);
605 BignumADC(v86
, carry
, v81
, 0, carry
);
606 BignumADC(v87
, carry
, v82
, 0, carry
);
607 BignumADC(v88
, carry
, v83
, 0, carry
);
608 v89
= v84
+ 0 + carry
;
616 static void bigval_final_reduce(bigval
*n
)
618 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v9
, v10
, v11
, v12
, v13
, v14
;
619 BignumInt v15
, v16
, v17
, v18
, v19
, v20
, v22
, v23
, v24
, v25
, v26
, v27
;
628 v6
= (v4
) & ((((BignumInt
)1) << 2)-1);
630 BignumADC(v9
, carry
, v0
, v7
, 0);
631 BignumADC(v10
, carry
, v1
, 0, carry
);
632 BignumADC(v11
, carry
, v2
, 0, carry
);
633 BignumADC(v12
, carry
, v3
, 0, carry
);
634 v13
= v6
+ 0 + carry
;
635 BignumADC(v14
, carry
, v9
, 5, 0);
637 BignumADC(v15
, carry
, v10
, 0, carry
);
639 BignumADC(v16
, carry
, v11
, 0, carry
);
641 BignumADC(v17
, carry
, v12
, 0, carry
);
643 v18
= v13
+ 0 + carry
;
646 BignumADC(v22
, carry
, v9
, v20
, 0);
647 BignumADC(v23
, carry
, v10
, 0, carry
);
648 BignumADC(v24
, carry
, v11
, 0, carry
);
649 BignumADC(v25
, carry
, v12
, 0, carry
);
650 v26
= v13
+ 0 + carry
;
651 v27
= (v26
) & ((((BignumInt
)1) << 2)-1);
659 #elif BIGNUM_INT_BITS == 64
661 static void bigval_add(bigval
*r
, const bigval
*a
, const bigval
*b
)
663 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
;
672 BignumADC(v6
, carry
, v0
, v3
, 0);
673 BignumADC(v7
, carry
, v1
, v4
, carry
);
674 v8
= v2
+ v5
+ carry
;
680 static void bigval_mul_mod_p(bigval
*r
, const bigval
*a
, const bigval
*b
)
682 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v6
, v7
, v8
, v9
, v10
, v11
, v12
, v13
, v14
;
683 BignumInt v15
, v16
, v17
, v18
, v19
, v20
, v21
, v22
, v24
, v25
, v26
, v27
, v28
;
684 BignumInt v29
, v30
, v31
, v32
, v33
, v34
, v36
, v38
, v39
, v40
, v41
, v42
, v43
;
693 BignumMUL(v7
, v6
, v0
, v3
);
694 BignumMULADD(v9
, v8
, v0
, v4
, v7
);
695 BignumMULADD(v11
, v10
, v0
, v5
, v9
);
696 BignumMULADD(v13
, v12
, v1
, v3
, v8
);
697 BignumMULADD2(v15
, v14
, v1
, v4
, v10
, v13
);
698 BignumMULADD2(v17
, v16
, v1
, v5
, v11
, v15
);
699 BignumMULADD(v19
, v18
, v2
, v3
, v14
);
700 BignumMULADD2(v21
, v20
, v2
, v4
, v16
, v19
);
701 v22
= v2
* v5
+ v17
+ v21
;
702 v24
= (v18
) & ((((BignumInt
)1) << 2)-1);
703 v25
= ((v18
) >> 2) | ((v20
) << 62);
704 v26
= ((v20
) >> 2) | ((v22
) << 62);
706 v28
= (v27
) & ((((BignumInt
)1) << 2)-1);
708 BignumMUL(v31
, v30
, 5, v25
);
709 BignumMULADD(v33
, v32
, 5, v26
, v31
);
712 BignumADC(v38
, carry
, v6
, v30
, 0);
713 BignumADC(v39
, carry
, v12
, v32
, carry
);
714 v40
= v24
+ v34
+ carry
;
715 BignumADC(v41
, carry
, v38
, v36
, 0);
716 BignumADC(v42
, carry
, v39
, 0, carry
);
717 v43
= v40
+ 0 + carry
;
723 static void bigval_final_reduce(bigval
*n
)
725 BignumInt v0
, v1
, v2
, v3
, v4
, v5
, v7
, v8
, v9
, v10
, v11
, v12
, v13
, v14
;
726 BignumInt v16
, v17
, v18
, v19
;
733 v4
= (v2
) & ((((BignumInt
)1) << 2)-1);
735 BignumADC(v7
, carry
, v0
, v5
, 0);
736 BignumADC(v8
, carry
, v1
, 0, carry
);
738 BignumADC(v10
, carry
, v7
, 5, 0);
740 BignumADC(v11
, carry
, v8
, 0, carry
);
742 v12
= v9
+ 0 + carry
;
745 BignumADC(v16
, carry
, v7
, v14
, 0);
746 BignumADC(v17
, carry
, v8
, 0, carry
);
747 v18
= v9
+ 0 + carry
;
748 v19
= (v18
) & ((((BignumInt
)1) << 2)-1);
755 #error Add another bit count to contrib/make1305.py and rerun it
759 unsigned char nonce
[16];
763 /* Buffer in case we get less that a multiple of 16 bytes */
764 unsigned char buffer
[16];
768 static void poly1305_init(struct poly1305
*ctx
)
770 memset(ctx
->nonce
, 0, 16);
771 ctx
->bufferIndex
= 0;
772 bigval_clear(&ctx
->h
);
775 /* Takes a 256 bit key */
776 static void poly1305_key(struct poly1305
*ctx
, const unsigned char *key
)
778 unsigned char key_copy
[16];
779 memcpy(key_copy
, key
, 16);
781 /* Key the MAC itself
782 * bytes 4, 8, 12 and 16 are required to have their top four bits clear */
785 key_copy
[11] &= 0x0f;
786 key_copy
[15] &= 0x0f;
787 /* bytes 5, 9 and 13 are required to have their bottom two bits clear */
790 key_copy
[12] &= 0xfc;
791 bigval_import_le(&ctx
->r
, key_copy
, 16);
792 smemclr(key_copy
, sizeof(key_copy
));
794 /* Use second 128 bits are the nonce */
795 memcpy(ctx
->nonce
, key
+16, 16);
798 /* Feed up to 16 bytes (should only be less for the last chunk) */
799 static void poly1305_feed_chunk(struct poly1305
*ctx
,
800 const unsigned char *chunk
, int len
)
803 bigval_import_le(&c
, chunk
, len
);
804 c
.w
[len
/ BIGNUM_INT_BYTES
] |=
805 (BignumInt
)1 << (8 * (len
% BIGNUM_INT_BYTES
));
806 bigval_add(&c
, &c
, &ctx
->h
);
807 bigval_mul_mod_p(&ctx
->h
, &c
, &ctx
->r
);
810 static void poly1305_feed(struct poly1305
*ctx
,
811 const unsigned char *buf
, int len
)
813 /* Check for stuff left in the buffer from last time */
814 if (ctx
->bufferIndex
) {
815 /* Try to fill up to 16 */
816 while (ctx
->bufferIndex
< 16 && len
) {
817 ctx
->buffer
[ctx
->bufferIndex
++] = *buf
++;
820 if (ctx
->bufferIndex
== 16) {
821 poly1305_feed_chunk(ctx
, ctx
->buffer
, 16);
822 ctx
->bufferIndex
= 0;
826 /* Process 16 byte whole chunks */
828 poly1305_feed_chunk(ctx
, buf
, 16);
833 /* Cache stuff that's left over */
835 memcpy(ctx
->buffer
, buf
, len
);
836 ctx
->bufferIndex
= len
;
840 /* Finalise and populate buffer with 16 byte with MAC */
841 static void poly1305_finalise(struct poly1305
*ctx
, unsigned char *mac
)
845 if (ctx
->bufferIndex
) {
846 poly1305_feed_chunk(ctx
, ctx
->buffer
, ctx
->bufferIndex
);
849 bigval_import_le(&tmp
, ctx
->nonce
, 16);
850 bigval_final_reduce(&ctx
->h
);
851 bigval_add(&tmp
, &tmp
, &ctx
->h
);
852 bigval_export_le(&tmp
, mac
, 16);
858 struct chacha20 a_cipher
; /* Used for length */
859 struct chacha20 b_cipher
; /* Used for content */
861 /* Cache of the first 4 bytes because they are the sequence number */
862 /* Kept in 8 bytes with the top as zero to allow easy passing to setiv */
863 int mac_initialised
; /* Where we have got to in filling mac_iv */
864 unsigned char mac_iv
[8];
869 static void *poly_make_context(void *ctx
)
874 static void poly_free_context(void *ctx
)
876 /* Not allocated, just forwarded, no need to free */
879 static void poly_setkey(void *ctx
, unsigned char *key
)
881 /* Uses the same context as ChaCha20, so ignore */
884 static void poly_start(void *handle
)
886 struct ccp_context
*ctx
= (struct ccp_context
*)handle
;
888 ctx
->mac_initialised
= 0;
889 memset(ctx
->mac_iv
, 0, 8);
890 poly1305_init(&ctx
->mac
);
893 static void poly_bytes(void *handle
, unsigned char const *blk
, int len
)
895 struct ccp_context
*ctx
= (struct ccp_context
*)handle
;
897 /* First 4 bytes are the IV */
898 while (ctx
->mac_initialised
< 4 && len
) {
899 ctx
->mac_iv
[7 - ctx
->mac_initialised
] = *blk
++;
900 ++ctx
->mac_initialised
;
904 /* Initialise the IV if needed */
905 if (ctx
->mac_initialised
== 4) {
906 chacha20_iv(&ctx
->b_cipher
, ctx
->mac_iv
);
907 ++ctx
->mac_initialised
; /* Don't do it again */
909 /* Do first rotation */
910 chacha20_round(&ctx
->b_cipher
);
912 /* Set the poly key */
913 poly1305_key(&ctx
->mac
, ctx
->b_cipher
.current
);
915 /* Set the first round as used */
916 ctx
->b_cipher
.currentIndex
= 64;
919 /* Update the MAC with anything left */
921 poly1305_feed(&ctx
->mac
, blk
, len
);
925 static void poly_genresult(void *handle
, unsigned char *blk
)
927 struct ccp_context
*ctx
= (struct ccp_context
*)handle
;
928 poly1305_finalise(&ctx
->mac
, blk
);
931 static int poly_verresult(void *handle
, unsigned char const *blk
)
933 struct ccp_context
*ctx
= (struct ccp_context
*)handle
;
935 unsigned char mac
[16];
936 poly1305_finalise(&ctx
->mac
, mac
);
937 res
= smemeq(blk
, mac
, 16);
941 /* The generic poly operation used before generate and verify */
942 static void poly_op(void *handle
, unsigned char *blk
, int len
, unsigned long seq
)
946 PUT_32BIT_MSB_FIRST(iv
, seq
);
947 /* poly_bytes expects the first 4 bytes to be the IV */
948 poly_bytes(handle
, iv
, 4);
949 smemclr(iv
, sizeof(iv
));
950 poly_bytes(handle
, blk
, len
);
953 static void poly_generate(void *handle
, unsigned char *blk
, int len
, unsigned long seq
)
955 poly_op(handle
, blk
, len
, seq
);
956 poly_genresult(handle
, blk
+len
);
959 static int poly_verify(void *handle
, unsigned char *blk
, int len
, unsigned long seq
)
961 poly_op(handle
, blk
, len
, seq
);
962 return poly_verresult(handle
, blk
+len
);
965 static const struct ssh_mac ssh2_poly1305
= {
966 poly_make_context
, poly_free_context
,
969 /* whole-packet operations */
970 poly_generate
, poly_verify
,
972 /* partial-packet operations */
973 poly_start
, poly_bytes
, poly_genresult
, poly_verresult
,
975 "", "", /* Not selectable individually, just part of ChaCha20-Poly1305 */
979 static void *ccp_make_context(void)
981 struct ccp_context
*ctx
= snew(struct ccp_context
);
983 poly1305_init(&ctx
->mac
);
988 static void ccp_free_context(void *vctx
)
990 struct ccp_context
*ctx
= (struct ccp_context
*)vctx
;
991 smemclr(&ctx
->a_cipher
, sizeof(ctx
->a_cipher
));
992 smemclr(&ctx
->b_cipher
, sizeof(ctx
->b_cipher
));
993 smemclr(&ctx
->mac
, sizeof(ctx
->mac
));
997 static void ccp_iv(void *vctx
, unsigned char *iv
)
999 /* struct ccp_context *ctx = (struct ccp_context *)vctx; */
1000 /* IV is set based on the sequence number */
1003 static void ccp_key(void *vctx
, unsigned char *key
)
1005 struct ccp_context
*ctx
= (struct ccp_context
*)vctx
;
1006 /* Initialise the a_cipher (for decrypting lengths) with the first 256 bits */
1007 chacha20_key(&ctx
->a_cipher
, key
+ 32);
1008 /* Initialise the b_cipher (for content and MAC) with the second 256 bits */
1009 chacha20_key(&ctx
->b_cipher
, key
);
1012 static void ccp_encrypt(void *vctx
, unsigned char *blk
, int len
)
1014 struct ccp_context
*ctx
= (struct ccp_context
*)vctx
;
1015 chacha20_encrypt(&ctx
->b_cipher
, blk
, len
);
1018 static void ccp_decrypt(void *vctx
, unsigned char *blk
, int len
)
1020 struct ccp_context
*ctx
= (struct ccp_context
*)vctx
;
1021 chacha20_decrypt(&ctx
->b_cipher
, blk
, len
);
1024 static void ccp_length_op(struct ccp_context
*ctx
, unsigned char *blk
, int len
,
1027 unsigned char iv
[8];
1029 * According to RFC 4253 (section 6.4), the packet sequence number wraps
1030 * at 2^32, so its 32 high-order bits will always be zero.
1032 PUT_32BIT_LSB_FIRST(iv
, 0);
1033 PUT_32BIT_LSB_FIRST(iv
+ 4, seq
);
1034 chacha20_iv(&ctx
->a_cipher
, iv
);
1035 chacha20_iv(&ctx
->b_cipher
, iv
);
1036 /* Reset content block count to 1, as the first is the key for Poly1305 */
1037 ++ctx
->b_cipher
.state
[12];
1038 smemclr(iv
, sizeof(iv
));
1041 static void ccp_encrypt_length(void *vctx
, unsigned char *blk
, int len
,
1044 struct ccp_context
*ctx
= (struct ccp_context
*)vctx
;
1045 ccp_length_op(ctx
, blk
, len
, seq
);
1046 chacha20_encrypt(&ctx
->a_cipher
, blk
, len
);
1049 static void ccp_decrypt_length(void *vctx
, unsigned char *blk
, int len
,
1052 struct ccp_context
*ctx
= (struct ccp_context
*)vctx
;
1053 ccp_length_op(ctx
, blk
, len
, seq
);
1054 chacha20_decrypt(&ctx
->a_cipher
, blk
, len
);
1057 static const struct ssh2_cipher ssh2_chacha20_poly1305
= {
1068 "chacha20-poly1305@openssh.com",
1069 1, 512, 64, SSH_CIPHER_SEPARATE_LENGTH
, "ChaCha20",
1074 static const struct ssh2_cipher
*const ccp_list
[] = {
1075 &ssh2_chacha20_poly1305
1078 const struct ssh2_ciphers ssh2_ccp
= {
1079 sizeof(ccp_list
) / sizeof(*ccp_list
),