1 /* Generic ChaCha20 implementation (used on arc4random).
2 Copyright (C) 2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <array_length.h>
25 /* 32-bit stream position, then 96-bit nonce. */
26 #define CHACHA20_IV_SIZE 16
27 #define CHACHA20_KEY_SIZE 32
29 #define CHACHA20_STATE_LEN 16
31 /* The ChaCha20 implementation is based on RFC8439 [1], omitting the final
32 XOR of the keystream with the plaintext because the plaintext is a
35 enum chacha20_constants
37 CHACHA20_CONSTANT_EXPA
= 0x61707865U
,
38 CHACHA20_CONSTANT_ND_3
= 0x3320646eU
,
39 CHACHA20_CONSTANT_2_BY
= 0x79622d32U
,
40 CHACHA20_CONSTANT_TE_K
= 0x6b206574U
43 static inline uint32_t
44 read_unaligned_32 (const uint8_t *p
)
47 memcpy (&r
, p
, sizeof (r
));
52 write_unaligned_32 (uint8_t *p
, uint32_t v
)
54 memcpy (p
, &v
, sizeof (v
));
57 #if __BYTE_ORDER == __BIG_ENDIAN
58 # define read_unaligned_le32(p) __builtin_bswap32 (read_unaligned_32 (p))
59 # define set_state(v) __builtin_bswap32 ((v))
61 # define read_unaligned_le32(p) read_unaligned_32 ((p))
62 # define set_state(v) (v)
66 chacha20_init (uint32_t *state
, const uint8_t *key
, const uint8_t *iv
)
68 state
[0] = CHACHA20_CONSTANT_EXPA
;
69 state
[1] = CHACHA20_CONSTANT_ND_3
;
70 state
[2] = CHACHA20_CONSTANT_2_BY
;
71 state
[3] = CHACHA20_CONSTANT_TE_K
;
73 state
[4] = read_unaligned_le32 (key
+ 0 * sizeof (uint32_t));
74 state
[5] = read_unaligned_le32 (key
+ 1 * sizeof (uint32_t));
75 state
[6] = read_unaligned_le32 (key
+ 2 * sizeof (uint32_t));
76 state
[7] = read_unaligned_le32 (key
+ 3 * sizeof (uint32_t));
77 state
[8] = read_unaligned_le32 (key
+ 4 * sizeof (uint32_t));
78 state
[9] = read_unaligned_le32 (key
+ 5 * sizeof (uint32_t));
79 state
[10] = read_unaligned_le32 (key
+ 6 * sizeof (uint32_t));
80 state
[11] = read_unaligned_le32 (key
+ 7 * sizeof (uint32_t));
82 state
[12] = read_unaligned_le32 (iv
+ 0 * sizeof (uint32_t));
83 state
[13] = read_unaligned_le32 (iv
+ 1 * sizeof (uint32_t));
84 state
[14] = read_unaligned_le32 (iv
+ 2 * sizeof (uint32_t));
85 state
[15] = read_unaligned_le32 (iv
+ 3 * sizeof (uint32_t));
88 static inline uint32_t
89 rotl32 (unsigned int shift
, uint32_t word
)
91 return (word
<< (shift
& 31)) | (word
>> ((-shift
) & 31));
95 state_final (const uint8_t *src
, uint8_t *dst
, uint32_t v
)
97 #ifdef CHACHA20_XOR_FINAL
98 v
^= read_unaligned_32 (src
);
100 write_unaligned_32 (dst
, v
);
104 chacha20_block (uint32_t *state
, uint8_t *dst
, const uint8_t *src
)
106 uint32_t x0
, x1
, x2
, x3
, x4
, x5
, x6
, x7
;
107 uint32_t x8
, x9
, x10
, x11
, x12
, x13
, x14
, x15
;
126 for (int i
= 0; i
< 20; i
+= 2)
128 #define QROUND(_x0, _x1, _x2, _x3) \
130 _x0 = _x0 + _x1; _x3 = rotl32 (16, (_x0 ^ _x3)); \
131 _x2 = _x2 + _x3; _x1 = rotl32 (12, (_x1 ^ _x2)); \
132 _x0 = _x0 + _x1; _x3 = rotl32 (8, (_x0 ^ _x3)); \
133 _x2 = _x2 + _x3; _x1 = rotl32 (7, (_x1 ^ _x2)); \
136 QROUND (x0
, x4
, x8
, x12
);
137 QROUND (x1
, x5
, x9
, x13
);
138 QROUND (x2
, x6
, x10
, x14
);
139 QROUND (x3
, x7
, x11
, x15
);
141 QROUND (x0
, x5
, x10
, x15
);
142 QROUND (x1
, x6
, x11
, x12
);
143 QROUND (x2
, x7
, x8
, x13
);
144 QROUND (x3
, x4
, x9
, x14
);
147 state_final (&src
[0], &dst
[0], set_state (x0
+ state
[0]));
148 state_final (&src
[4], &dst
[4], set_state (x1
+ state
[1]));
149 state_final (&src
[8], &dst
[8], set_state (x2
+ state
[2]));
150 state_final (&src
[12], &dst
[12], set_state (x3
+ state
[3]));
151 state_final (&src
[16], &dst
[16], set_state (x4
+ state
[4]));
152 state_final (&src
[20], &dst
[20], set_state (x5
+ state
[5]));
153 state_final (&src
[24], &dst
[24], set_state (x6
+ state
[6]));
154 state_final (&src
[28], &dst
[28], set_state (x7
+ state
[7]));
155 state_final (&src
[32], &dst
[32], set_state (x8
+ state
[8]));
156 state_final (&src
[36], &dst
[36], set_state (x9
+ state
[9]));
157 state_final (&src
[40], &dst
[40], set_state (x10
+ state
[10]));
158 state_final (&src
[44], &dst
[44], set_state (x11
+ state
[11]));
159 state_final (&src
[48], &dst
[48], set_state (x12
+ state
[12]));
160 state_final (&src
[52], &dst
[52], set_state (x13
+ state
[13]));
161 state_final (&src
[56], &dst
[56], set_state (x14
+ state
[14]));
162 state_final (&src
[60], &dst
[60], set_state (x15
+ state
[15]));
168 chacha20_crypt (uint32_t *state
, uint8_t *dst
, const uint8_t *src
,
171 while (bytes
>= CHACHA20_BLOCK_SIZE
)
173 chacha20_block (state
, dst
, src
);
175 bytes
-= CHACHA20_BLOCK_SIZE
;
176 dst
+= CHACHA20_BLOCK_SIZE
;
177 src
+= CHACHA20_BLOCK_SIZE
;
180 if (__glibc_unlikely (bytes
!= 0))
182 uint8_t stream
[CHACHA20_BLOCK_SIZE
];
183 chacha20_block (state
, stream
, src
);
184 memcpy (dst
, stream
, bytes
);
185 explicit_bzero (stream
, sizeof stream
);