net: Define # of cpus for network processing.
[dragonfly.git] / crypto / openssh / chacha.c
bloba84c25ea88cea89c6abf879f9597a53246e7403d
1 /*
2 chacha-merged.c version 20080118
3 D. J. Bernstein
4 Public domain.
5 */
7 #include "includes.h"
9 #include "chacha.h"
11 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
13 typedef unsigned char u8;
14 typedef unsigned int u32;
16 typedef struct chacha_ctx chacha_ctx;
18 #define U8C(v) (v##U)
19 #define U32C(v) (v##U)
21 #define U8V(v) ((u8)(v) & U8C(0xFF))
22 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
24 #define ROTL32(v, n) \
25 (U32V((v) << (n)) | ((v) >> (32 - (n))))
27 #define U8TO32_LITTLE(p) \
28 (((u32)((p)[0]) ) | \
29 ((u32)((p)[1]) << 8) | \
30 ((u32)((p)[2]) << 16) | \
31 ((u32)((p)[3]) << 24))
33 #define U32TO8_LITTLE(p, v) \
34 do { \
35 (p)[0] = U8V((v) ); \
36 (p)[1] = U8V((v) >> 8); \
37 (p)[2] = U8V((v) >> 16); \
38 (p)[3] = U8V((v) >> 24); \
39 } while (0)
41 #define ROTATE(v,c) (ROTL32(v,c))
42 #define XOR(v,w) ((v) ^ (w))
43 #define PLUS(v,w) (U32V((v) + (w)))
44 #define PLUSONE(v) (PLUS((v),1))
46 #define QUARTERROUND(a,b,c,d) \
47 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
48 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
49 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
50 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
52 static const char sigma[16] = "expand 32-byte k";
53 static const char tau[16] = "expand 16-byte k";
55 void
56 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
58 const char *constants;
60 x->input[4] = U8TO32_LITTLE(k + 0);
61 x->input[5] = U8TO32_LITTLE(k + 4);
62 x->input[6] = U8TO32_LITTLE(k + 8);
63 x->input[7] = U8TO32_LITTLE(k + 12);
64 if (kbits == 256) { /* recommended */
65 k += 16;
66 constants = sigma;
67 } else { /* kbits == 128 */
68 constants = tau;
70 x->input[8] = U8TO32_LITTLE(k + 0);
71 x->input[9] = U8TO32_LITTLE(k + 4);
72 x->input[10] = U8TO32_LITTLE(k + 8);
73 x->input[11] = U8TO32_LITTLE(k + 12);
74 x->input[0] = U8TO32_LITTLE(constants + 0);
75 x->input[1] = U8TO32_LITTLE(constants + 4);
76 x->input[2] = U8TO32_LITTLE(constants + 8);
77 x->input[3] = U8TO32_LITTLE(constants + 12);
80 void
81 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
83 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
84 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
85 x->input[14] = U8TO32_LITTLE(iv + 0);
86 x->input[15] = U8TO32_LITTLE(iv + 4);
89 void
90 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
92 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
93 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
94 u8 *ctarget = NULL;
95 u8 tmp[64];
96 u_int i;
98 if (!bytes) return;
100 j0 = x->input[0];
101 j1 = x->input[1];
102 j2 = x->input[2];
103 j3 = x->input[3];
104 j4 = x->input[4];
105 j5 = x->input[5];
106 j6 = x->input[6];
107 j7 = x->input[7];
108 j8 = x->input[8];
109 j9 = x->input[9];
110 j10 = x->input[10];
111 j11 = x->input[11];
112 j12 = x->input[12];
113 j13 = x->input[13];
114 j14 = x->input[14];
115 j15 = x->input[15];
117 for (;;) {
118 if (bytes < 64) {
119 for (i = 0;i < bytes;++i) tmp[i] = m[i];
120 m = tmp;
121 ctarget = c;
122 c = tmp;
124 x0 = j0;
125 x1 = j1;
126 x2 = j2;
127 x3 = j3;
128 x4 = j4;
129 x5 = j5;
130 x6 = j6;
131 x7 = j7;
132 x8 = j8;
133 x9 = j9;
134 x10 = j10;
135 x11 = j11;
136 x12 = j12;
137 x13 = j13;
138 x14 = j14;
139 x15 = j15;
140 for (i = 20;i > 0;i -= 2) {
141 QUARTERROUND( x0, x4, x8,x12)
142 QUARTERROUND( x1, x5, x9,x13)
143 QUARTERROUND( x2, x6,x10,x14)
144 QUARTERROUND( x3, x7,x11,x15)
145 QUARTERROUND( x0, x5,x10,x15)
146 QUARTERROUND( x1, x6,x11,x12)
147 QUARTERROUND( x2, x7, x8,x13)
148 QUARTERROUND( x3, x4, x9,x14)
150 x0 = PLUS(x0,j0);
151 x1 = PLUS(x1,j1);
152 x2 = PLUS(x2,j2);
153 x3 = PLUS(x3,j3);
154 x4 = PLUS(x4,j4);
155 x5 = PLUS(x5,j5);
156 x6 = PLUS(x6,j6);
157 x7 = PLUS(x7,j7);
158 x8 = PLUS(x8,j8);
159 x9 = PLUS(x9,j9);
160 x10 = PLUS(x10,j10);
161 x11 = PLUS(x11,j11);
162 x12 = PLUS(x12,j12);
163 x13 = PLUS(x13,j13);
164 x14 = PLUS(x14,j14);
165 x15 = PLUS(x15,j15);
167 x0 = XOR(x0,U8TO32_LITTLE(m + 0));
168 x1 = XOR(x1,U8TO32_LITTLE(m + 4));
169 x2 = XOR(x2,U8TO32_LITTLE(m + 8));
170 x3 = XOR(x3,U8TO32_LITTLE(m + 12));
171 x4 = XOR(x4,U8TO32_LITTLE(m + 16));
172 x5 = XOR(x5,U8TO32_LITTLE(m + 20));
173 x6 = XOR(x6,U8TO32_LITTLE(m + 24));
174 x7 = XOR(x7,U8TO32_LITTLE(m + 28));
175 x8 = XOR(x8,U8TO32_LITTLE(m + 32));
176 x9 = XOR(x9,U8TO32_LITTLE(m + 36));
177 x10 = XOR(x10,U8TO32_LITTLE(m + 40));
178 x11 = XOR(x11,U8TO32_LITTLE(m + 44));
179 x12 = XOR(x12,U8TO32_LITTLE(m + 48));
180 x13 = XOR(x13,U8TO32_LITTLE(m + 52));
181 x14 = XOR(x14,U8TO32_LITTLE(m + 56));
182 x15 = XOR(x15,U8TO32_LITTLE(m + 60));
184 j12 = PLUSONE(j12);
185 if (!j12) {
186 j13 = PLUSONE(j13);
187 /* stopping at 2^70 bytes per nonce is user's responsibility */
190 U32TO8_LITTLE(c + 0,x0);
191 U32TO8_LITTLE(c + 4,x1);
192 U32TO8_LITTLE(c + 8,x2);
193 U32TO8_LITTLE(c + 12,x3);
194 U32TO8_LITTLE(c + 16,x4);
195 U32TO8_LITTLE(c + 20,x5);
196 U32TO8_LITTLE(c + 24,x6);
197 U32TO8_LITTLE(c + 28,x7);
198 U32TO8_LITTLE(c + 32,x8);
199 U32TO8_LITTLE(c + 36,x9);
200 U32TO8_LITTLE(c + 40,x10);
201 U32TO8_LITTLE(c + 44,x11);
202 U32TO8_LITTLE(c + 48,x12);
203 U32TO8_LITTLE(c + 52,x13);
204 U32TO8_LITTLE(c + 56,x14);
205 U32TO8_LITTLE(c + 60,x15);
207 if (bytes <= 64) {
208 if (bytes < 64) {
209 for (i = 0;i < bytes;++i) ctarget[i] = c[i];
211 x->input[12] = j12;
212 x->input[13] = j13;
213 return;
215 bytes -= 64;
216 c += 64;
217 m += 64;