5 Derived from public domain code by D. J. Bernstein.
12 static void add(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
17 for (j
= 0;j
< 31;++j
) { u
+= a
[j
] + b
[j
]; out
[j
] = u
& 255; u
>>= 8; }
18 u
+= a
[31] + b
[31]; out
[31] = u
;
21 static void sub(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
26 for (j
= 0;j
< 31;++j
) {
27 u
+= a
[j
] + 65280 - b
[j
];
35 static void squeeze(unsigned int a
[32])
40 for (j
= 0;j
< 31;++j
) { u
+= a
[j
]; a
[j
] = u
& 255; u
>>= 8; }
41 u
+= a
[31]; a
[31] = u
& 127;
43 for (j
= 0;j
< 31;++j
) { u
+= a
[j
]; a
[j
] = u
& 255; u
>>= 8; }
44 u
+= a
[31]; a
[31] = u
;
47 static const unsigned int minusp
[32] = {
48 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
51 static void freeze(unsigned int a
[32])
53 unsigned int aorig
[32];
55 unsigned int negative
;
57 for (j
= 0;j
< 32;++j
) aorig
[j
] = a
[j
];
59 negative
= -((a
[31] >> 7) & 1);
60 for (j
= 0;j
< 32;++j
) a
[j
] ^= negative
& (aorig
[j
] ^ a
[j
]);
63 static void mult(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
69 for (i
= 0;i
< 32;++i
) {
71 for (j
= 0;j
<= i
;++j
) u
+= a
[j
] * b
[i
- j
];
72 for (j
= i
+ 1;j
< 32;++j
) u
+= 38 * a
[j
] * b
[i
+ 32 - j
];
78 static void mult121665(unsigned int out
[32],const unsigned int a
[32])
84 for (j
= 0;j
< 31;++j
) { u
+= 121665 * a
[j
]; out
[j
] = u
& 255; u
>>= 8; }
85 u
+= 121665 * a
[31]; out
[31] = u
& 127;
87 for (j
= 0;j
< 31;++j
) { u
+= out
[j
]; out
[j
] = u
& 255; u
>>= 8; }
88 u
+= out
[j
]; out
[j
] = u
;
91 static void square(unsigned int out
[32],const unsigned int a
[32])
97 for (i
= 0;i
< 32;++i
) {
99 for (j
= 0;j
< i
- j
;++j
) u
+= a
[j
] * a
[i
- j
];
100 for (j
= i
+ 1;j
< i
+ 32 - j
;++j
) u
+= 38 * a
[j
] * a
[i
+ 32 - j
];
103 u
+= a
[i
/ 2] * a
[i
/ 2];
104 u
+= 38 * a
[i
/ 2 + 16] * a
[i
/ 2 + 16];
111 static void select(unsigned int p
[64],unsigned int q
[64],const unsigned int r
[64],const unsigned int s
[64],unsigned int b
)
115 unsigned int bminus1
;
118 for (j
= 0;j
< 64;++j
) {
119 t
= bminus1
& (r
[j
] ^ s
[j
]);
125 static void mainloop(unsigned int work
[64],const unsigned char e
[32])
127 unsigned int xzm1
[64];
128 unsigned int xzm
[64];
129 unsigned int xzmb
[64];
130 unsigned int xzm1b
[64];
131 unsigned int xznb
[64];
132 unsigned int xzn1b
[64];
146 for (j
= 0;j
< 32;++j
) xzm1
[j
] = work
[j
];
148 for (j
= 33;j
< 64;++j
) xzm1
[j
] = 0;
151 for (j
= 1;j
< 64;++j
) xzm
[j
] = 0;
153 for (pos
= 254;pos
>= 0;--pos
) {
154 b
= e
[pos
/ 8] >> (pos
& 7);
156 select(xzmb
,xzm1b
,xzm
,xzm1
,b
);
157 add(a0
,xzmb
,xzmb
+ 32);
158 sub(a0
+ 32,xzmb
,xzmb
+ 32);
159 add(a1
,xzm1b
,xzm1b
+ 32);
160 sub(a1
+ 32,xzm1b
,xzm1b
+ 32);
162 square(b0
+ 32,a0
+ 32);
164 mult(b1
+ 32,a1
+ 32,a0
);
166 sub(c1
+ 32,b1
,b1
+ 32);
171 mult(xznb
,b0
,b0
+ 32);
174 mult(xzn1b
+ 32,r
,work
);
175 select(xzm
,xzm1
,xznb
,xzn1b
,b
);
178 for (j
= 0;j
< 64;++j
) work
[j
] = xzm
[j
];
181 static void recip(unsigned int out
[32],const unsigned int z
[32])
185 unsigned int z11
[32];
186 unsigned int z2_5_0
[32];
187 unsigned int z2_10_0
[32];
188 unsigned int z2_20_0
[32];
189 unsigned int z2_50_0
[32];
190 unsigned int z2_100_0
[32];
195 /* 2 */ square(z2
,z
);
196 /* 4 */ square(t1
,z2
);
197 /* 8 */ square(t0
,t1
);
198 /* 9 */ mult(z9
,t0
,z
);
199 /* 11 */ mult(z11
,z9
,z2
);
200 /* 22 */ square(t0
,z11
);
201 /* 2^5 - 2^0 = 31 */ mult(z2_5_0
,t0
,z9
);
203 /* 2^6 - 2^1 */ square(t0
,z2_5_0
);
204 /* 2^7 - 2^2 */ square(t1
,t0
);
205 /* 2^8 - 2^3 */ square(t0
,t1
);
206 /* 2^9 - 2^4 */ square(t1
,t0
);
207 /* 2^10 - 2^5 */ square(t0
,t1
);
208 /* 2^10 - 2^0 */ mult(z2_10_0
,t0
,z2_5_0
);
210 /* 2^11 - 2^1 */ square(t0
,z2_10_0
);
211 /* 2^12 - 2^2 */ square(t1
,t0
);
212 /* 2^20 - 2^10 */ for (i
= 2;i
< 10;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
213 /* 2^20 - 2^0 */ mult(z2_20_0
,t1
,z2_10_0
);
215 /* 2^21 - 2^1 */ square(t0
,z2_20_0
);
216 /* 2^22 - 2^2 */ square(t1
,t0
);
217 /* 2^40 - 2^20 */ for (i
= 2;i
< 20;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
218 /* 2^40 - 2^0 */ mult(t0
,t1
,z2_20_0
);
220 /* 2^41 - 2^1 */ square(t1
,t0
);
221 /* 2^42 - 2^2 */ square(t0
,t1
);
222 /* 2^50 - 2^10 */ for (i
= 2;i
< 10;i
+= 2) { square(t1
,t0
); square(t0
,t1
); }
223 /* 2^50 - 2^0 */ mult(z2_50_0
,t0
,z2_10_0
);
225 /* 2^51 - 2^1 */ square(t0
,z2_50_0
);
226 /* 2^52 - 2^2 */ square(t1
,t0
);
227 /* 2^100 - 2^50 */ for (i
= 2;i
< 50;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
228 /* 2^100 - 2^0 */ mult(z2_100_0
,t1
,z2_50_0
);
230 /* 2^101 - 2^1 */ square(t1
,z2_100_0
);
231 /* 2^102 - 2^2 */ square(t0
,t1
);
232 /* 2^200 - 2^100 */ for (i
= 2;i
< 100;i
+= 2) { square(t1
,t0
); square(t0
,t1
); }
233 /* 2^200 - 2^0 */ mult(t1
,t0
,z2_100_0
);
235 /* 2^201 - 2^1 */ square(t0
,t1
);
236 /* 2^202 - 2^2 */ square(t1
,t0
);
237 /* 2^250 - 2^50 */ for (i
= 2;i
< 50;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
238 /* 2^250 - 2^0 */ mult(t0
,t1
,z2_50_0
);
240 /* 2^251 - 2^1 */ square(t1
,t0
);
241 /* 2^252 - 2^2 */ square(t0
,t1
);
242 /* 2^253 - 2^3 */ square(t1
,t0
);
243 /* 2^254 - 2^4 */ square(t0
,t1
);
244 /* 2^255 - 2^5 */ square(t1
,t0
);
245 /* 2^255 - 21 */ mult(out
,t1
,z11
);
248 int crypto_scalarmult(unsigned char *q
,
249 const unsigned char *n
,
250 const unsigned char *p
)
252 unsigned int work
[96];
255 for (i
= 0;i
< 32;++i
) e
[i
] = n
[i
];
259 for (i
= 0;i
< 32;++i
) work
[i
] = p
[i
];
261 recip(work
+ 32,work
+ 32);
262 mult(work
+ 64,work
,work
+ 32);
264 for (i
= 0;i
< 32;++i
) q
[i
] = work
[64 + i
];