1 /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
6 Derived from public domain code by D. J. Bernstein.
9 int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
11 static void add(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
16 for (j
= 0;j
< 31;++j
) { u
+= a
[j
] + b
[j
]; out
[j
] = u
& 255; u
>>= 8; }
17 u
+= a
[31] + b
[31]; out
[31] = u
;
20 static void sub(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
25 for (j
= 0;j
< 31;++j
) {
26 u
+= a
[j
] + 65280 - b
[j
];
34 static void squeeze(unsigned int a
[32])
39 for (j
= 0;j
< 31;++j
) { u
+= a
[j
]; a
[j
] = u
& 255; u
>>= 8; }
40 u
+= a
[31]; a
[31] = u
& 127;
42 for (j
= 0;j
< 31;++j
) { u
+= a
[j
]; a
[j
] = u
& 255; u
>>= 8; }
43 u
+= a
[31]; a
[31] = u
;
46 static const unsigned int minusp
[32] = {
47 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
50 static void freeze(unsigned int a
[32])
52 unsigned int aorig
[32];
54 unsigned int negative
;
56 for (j
= 0;j
< 32;++j
) aorig
[j
] = a
[j
];
58 negative
= -((a
[31] >> 7) & 1);
59 for (j
= 0;j
< 32;++j
) a
[j
] ^= negative
& (aorig
[j
] ^ a
[j
]);
62 static void mult(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
68 for (i
= 0;i
< 32;++i
) {
70 for (j
= 0;j
<= i
;++j
) u
+= a
[j
] * b
[i
- j
];
71 for (j
= i
+ 1;j
< 32;++j
) u
+= 38 * a
[j
] * b
[i
+ 32 - j
];
77 static void mult121665(unsigned int out
[32],const unsigned int a
[32])
83 for (j
= 0;j
< 31;++j
) { u
+= 121665 * a
[j
]; out
[j
] = u
& 255; u
>>= 8; }
84 u
+= 121665 * a
[31]; out
[31] = u
& 127;
86 for (j
= 0;j
< 31;++j
) { u
+= out
[j
]; out
[j
] = u
& 255; u
>>= 8; }
87 u
+= out
[j
]; out
[j
] = u
;
90 static void square(unsigned int out
[32],const unsigned int a
[32])
96 for (i
= 0;i
< 32;++i
) {
98 for (j
= 0;j
< i
- j
;++j
) u
+= a
[j
] * a
[i
- j
];
99 for (j
= i
+ 1;j
< i
+ 32 - j
;++j
) u
+= 38 * a
[j
] * a
[i
+ 32 - j
];
102 u
+= a
[i
/ 2] * a
[i
/ 2];
103 u
+= 38 * a
[i
/ 2 + 16] * a
[i
/ 2 + 16];
110 static void select(unsigned int p
[64],unsigned int q
[64],const unsigned int r
[64],const unsigned int s
[64],unsigned int b
)
114 unsigned int bminus1
;
117 for (j
= 0;j
< 64;++j
) {
118 t
= bminus1
& (r
[j
] ^ s
[j
]);
124 static void mainloop(unsigned int work
[64],const unsigned char e
[32])
126 unsigned int xzm1
[64];
127 unsigned int xzm
[64];
128 unsigned int xzmb
[64];
129 unsigned int xzm1b
[64];
130 unsigned int xznb
[64];
131 unsigned int xzn1b
[64];
145 for (j
= 0;j
< 32;++j
) xzm1
[j
] = work
[j
];
147 for (j
= 33;j
< 64;++j
) xzm1
[j
] = 0;
150 for (j
= 1;j
< 64;++j
) xzm
[j
] = 0;
152 for (pos
= 254;pos
>= 0;--pos
) {
153 b
= e
[pos
/ 8] >> (pos
& 7);
155 select(xzmb
,xzm1b
,xzm
,xzm1
,b
);
156 add(a0
,xzmb
,xzmb
+ 32);
157 sub(a0
+ 32,xzmb
,xzmb
+ 32);
158 add(a1
,xzm1b
,xzm1b
+ 32);
159 sub(a1
+ 32,xzm1b
,xzm1b
+ 32);
161 square(b0
+ 32,a0
+ 32);
163 mult(b1
+ 32,a1
+ 32,a0
);
165 sub(c1
+ 32,b1
,b1
+ 32);
170 mult(xznb
,b0
,b0
+ 32);
173 mult(xzn1b
+ 32,r
,work
);
174 select(xzm
,xzm1
,xznb
,xzn1b
,b
);
177 for (j
= 0;j
< 64;++j
) work
[j
] = xzm
[j
];
180 static void recip(unsigned int out
[32],const unsigned int z
[32])
184 unsigned int z11
[32];
185 unsigned int z2_5_0
[32];
186 unsigned int z2_10_0
[32];
187 unsigned int z2_20_0
[32];
188 unsigned int z2_50_0
[32];
189 unsigned int z2_100_0
[32];
194 /* 2 */ square(z2
,z
);
195 /* 4 */ square(t1
,z2
);
196 /* 8 */ square(t0
,t1
);
197 /* 9 */ mult(z9
,t0
,z
);
198 /* 11 */ mult(z11
,z9
,z2
);
199 /* 22 */ square(t0
,z11
);
200 /* 2^5 - 2^0 = 31 */ mult(z2_5_0
,t0
,z9
);
202 /* 2^6 - 2^1 */ square(t0
,z2_5_0
);
203 /* 2^7 - 2^2 */ square(t1
,t0
);
204 /* 2^8 - 2^3 */ square(t0
,t1
);
205 /* 2^9 - 2^4 */ square(t1
,t0
);
206 /* 2^10 - 2^5 */ square(t0
,t1
);
207 /* 2^10 - 2^0 */ mult(z2_10_0
,t0
,z2_5_0
);
209 /* 2^11 - 2^1 */ square(t0
,z2_10_0
);
210 /* 2^12 - 2^2 */ square(t1
,t0
);
211 /* 2^20 - 2^10 */ for (i
= 2;i
< 10;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
212 /* 2^20 - 2^0 */ mult(z2_20_0
,t1
,z2_10_0
);
214 /* 2^21 - 2^1 */ square(t0
,z2_20_0
);
215 /* 2^22 - 2^2 */ square(t1
,t0
);
216 /* 2^40 - 2^20 */ for (i
= 2;i
< 20;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
217 /* 2^40 - 2^0 */ mult(t0
,t1
,z2_20_0
);
219 /* 2^41 - 2^1 */ square(t1
,t0
);
220 /* 2^42 - 2^2 */ square(t0
,t1
);
221 /* 2^50 - 2^10 */ for (i
= 2;i
< 10;i
+= 2) { square(t1
,t0
); square(t0
,t1
); }
222 /* 2^50 - 2^0 */ mult(z2_50_0
,t0
,z2_10_0
);
224 /* 2^51 - 2^1 */ square(t0
,z2_50_0
);
225 /* 2^52 - 2^2 */ square(t1
,t0
);
226 /* 2^100 - 2^50 */ for (i
= 2;i
< 50;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
227 /* 2^100 - 2^0 */ mult(z2_100_0
,t1
,z2_50_0
);
229 /* 2^101 - 2^1 */ square(t1
,z2_100_0
);
230 /* 2^102 - 2^2 */ square(t0
,t1
);
231 /* 2^200 - 2^100 */ for (i
= 2;i
< 100;i
+= 2) { square(t1
,t0
); square(t0
,t1
); }
232 /* 2^200 - 2^0 */ mult(t1
,t0
,z2_100_0
);
234 /* 2^201 - 2^1 */ square(t0
,t1
);
235 /* 2^202 - 2^2 */ square(t1
,t0
);
236 /* 2^250 - 2^50 */ for (i
= 2;i
< 50;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
237 /* 2^250 - 2^0 */ mult(t0
,t1
,z2_50_0
);
239 /* 2^251 - 2^1 */ square(t1
,t0
);
240 /* 2^252 - 2^2 */ square(t0
,t1
);
241 /* 2^253 - 2^3 */ square(t1
,t0
);
242 /* 2^254 - 2^4 */ square(t0
,t1
);
243 /* 2^255 - 2^5 */ square(t1
,t0
);
244 /* 2^255 - 21 */ mult(out
,t1
,z11
);
247 int crypto_scalarmult_curve25519(unsigned char *q
,
248 const unsigned char *n
,
249 const unsigned char *p
)
251 unsigned int work
[96];
254 for (i
= 0;i
< 32;++i
) e
[i
] = n
[i
];
258 for (i
= 0;i
< 32;++i
) work
[i
] = p
[i
];
260 recip(work
+ 32,work
+ 32);
261 mult(work
+ 64,work
,work
+ 32);
263 for (i
= 0;i
< 32;++i
) q
[i
] = work
[64 + i
];