5 Derived from public domain code by D. J. Bernstein.
8 #include "crypto_scalarmult.h"
10 static void add(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
15 for (j
= 0;j
< 31;++j
) { u
+= a
[j
] + b
[j
]; out
[j
] = u
& 255; u
>>= 8; }
16 u
+= a
[31] + b
[31]; out
[31] = u
;
19 static void sub(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
24 for (j
= 0;j
< 31;++j
) {
25 u
+= a
[j
] + 65280 - b
[j
];
33 static void squeeze(unsigned int a
[32])
38 for (j
= 0;j
< 31;++j
) { u
+= a
[j
]; a
[j
] = u
& 255; u
>>= 8; }
39 u
+= a
[31]; a
[31] = u
& 127;
41 for (j
= 0;j
< 31;++j
) { u
+= a
[j
]; a
[j
] = u
& 255; u
>>= 8; }
42 u
+= a
[31]; a
[31] = u
;
45 static const unsigned int minusp
[32] = {
46 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
49 static void freeze(unsigned int a
[32])
51 unsigned int aorig
[32];
53 unsigned int negative
;
55 for (j
= 0;j
< 32;++j
) aorig
[j
] = a
[j
];
57 negative
= -((a
[31] >> 7) & 1);
58 for (j
= 0;j
< 32;++j
) a
[j
] ^= negative
& (aorig
[j
] ^ a
[j
]);
61 static void mult(unsigned int out
[32],const unsigned int a
[32],const unsigned int b
[32])
67 for (i
= 0;i
< 32;++i
) {
69 for (j
= 0;j
<= i
;++j
) u
+= a
[j
] * b
[i
- j
];
70 for (j
= i
+ 1;j
< 32;++j
) u
+= 38 * a
[j
] * b
[i
+ 32 - j
];
76 static void mult121665(unsigned int out
[32],const unsigned int a
[32])
82 for (j
= 0;j
< 31;++j
) { u
+= 121665 * a
[j
]; out
[j
] = u
& 255; u
>>= 8; }
83 u
+= 121665 * a
[31]; out
[31] = u
& 127;
85 for (j
= 0;j
< 31;++j
) { u
+= out
[j
]; out
[j
] = u
& 255; u
>>= 8; }
86 u
+= out
[j
]; out
[j
] = u
;
89 static void square(unsigned int out
[32],const unsigned int a
[32])
95 for (i
= 0;i
< 32;++i
) {
97 for (j
= 0;j
< i
- j
;++j
) u
+= a
[j
] * a
[i
- j
];
98 for (j
= i
+ 1;j
< i
+ 32 - j
;++j
) u
+= 38 * a
[j
] * a
[i
+ 32 - j
];
101 u
+= a
[i
/ 2] * a
[i
/ 2];
102 u
+= 38 * a
[i
/ 2 + 16] * a
[i
/ 2 + 16];
109 static void select(unsigned int p
[64],unsigned int q
[64],const unsigned int r
[64],const unsigned int s
[64],unsigned int b
)
113 unsigned int bminus1
;
116 for (j
= 0;j
< 64;++j
) {
117 t
= bminus1
& (r
[j
] ^ s
[j
]);
123 static void mainloop(unsigned int work
[64],const unsigned char e
[32])
125 unsigned int xzm1
[64];
126 unsigned int xzm
[64];
127 unsigned int xzmb
[64];
128 unsigned int xzm1b
[64];
129 unsigned int xznb
[64];
130 unsigned int xzn1b
[64];
145 for (j
= 0;j
< 32;++j
) xzm1
[j
] = work
[j
];
147 for (j
= 33;j
< 64;++j
) xzm1
[j
] = 0;
150 for (j
= 1;j
< 64;++j
) xzm
[j
] = 0;
152 for (pos
= 254;pos
>= 0;--pos
) {
153 b
= e
[pos
/ 8] >> (pos
& 7);
155 select(xzmb
,xzm1b
,xzm
,xzm1
,b
);
156 add(a0
,xzmb
,xzmb
+ 32);
157 sub(a0
+ 32,xzmb
,xzmb
+ 32);
158 add(a1
,xzm1b
,xzm1b
+ 32);
159 sub(a1
+ 32,xzm1b
,xzm1b
+ 32);
161 square(b0
+ 32,a0
+ 32);
163 mult(b1
+ 32,a1
+ 32,a0
);
165 sub(c1
+ 32,b1
,b1
+ 32);
170 mult(xznb
,b0
,b0
+ 32);
173 mult(xzn1b
+ 32,r
,work
);
174 select(xzm
,xzm1
,xznb
,xzn1b
,b
);
177 for (j
= 0;j
< 64;++j
) work
[j
] = xzm
[j
];
180 static void recip(unsigned int out
[32],const unsigned int z
[32])
184 unsigned int z11
[32];
185 unsigned int z2_5_0
[32];
186 unsigned int z2_10_0
[32];
187 unsigned int z2_20_0
[32];
188 unsigned int z2_50_0
[32];
189 unsigned int z2_100_0
[32];
194 /* 2 */ square(z2
,z
);
195 /* 4 */ square(t1
,z2
);
196 /* 8 */ square(t0
,t1
);
197 /* 9 */ mult(z9
,t0
,z
);
198 /* 11 */ mult(z11
,z9
,z2
);
199 /* 22 */ square(t0
,z11
);
200 /* 2^5 - 2^0 = 31 */ mult(z2_5_0
,t0
,z9
);
202 /* 2^6 - 2^1 */ square(t0
,z2_5_0
);
203 /* 2^7 - 2^2 */ square(t1
,t0
);
204 /* 2^8 - 2^3 */ square(t0
,t1
);
205 /* 2^9 - 2^4 */ square(t1
,t0
);
206 /* 2^10 - 2^5 */ square(t0
,t1
);
207 /* 2^10 - 2^0 */ mult(z2_10_0
,t0
,z2_5_0
);
209 /* 2^11 - 2^1 */ square(t0
,z2_10_0
);
210 /* 2^12 - 2^2 */ square(t1
,t0
);
211 /* 2^20 - 2^10 */ for (i
= 2;i
< 10;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
212 /* 2^20 - 2^0 */ mult(z2_20_0
,t1
,z2_10_0
);
214 /* 2^21 - 2^1 */ square(t0
,z2_20_0
);
215 /* 2^22 - 2^2 */ square(t1
,t0
);
216 /* 2^40 - 2^20 */ for (i
= 2;i
< 20;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
217 /* 2^40 - 2^0 */ mult(t0
,t1
,z2_20_0
);
219 /* 2^41 - 2^1 */ square(t1
,t0
);
220 /* 2^42 - 2^2 */ square(t0
,t1
);
221 /* 2^50 - 2^10 */ for (i
= 2;i
< 10;i
+= 2) { square(t1
,t0
); square(t0
,t1
); }
222 /* 2^50 - 2^0 */ mult(z2_50_0
,t0
,z2_10_0
);
224 /* 2^51 - 2^1 */ square(t0
,z2_50_0
);
225 /* 2^52 - 2^2 */ square(t1
,t0
);
226 /* 2^100 - 2^50 */ for (i
= 2;i
< 50;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
227 /* 2^100 - 2^0 */ mult(z2_100_0
,t1
,z2_50_0
);
229 /* 2^101 - 2^1 */ square(t1
,z2_100_0
);
230 /* 2^102 - 2^2 */ square(t0
,t1
);
231 /* 2^200 - 2^100 */ for (i
= 2;i
< 100;i
+= 2) { square(t1
,t0
); square(t0
,t1
); }
232 /* 2^200 - 2^0 */ mult(t1
,t0
,z2_100_0
);
234 /* 2^201 - 2^1 */ square(t0
,t1
);
235 /* 2^202 - 2^2 */ square(t1
,t0
);
236 /* 2^250 - 2^50 */ for (i
= 2;i
< 50;i
+= 2) { square(t0
,t1
); square(t1
,t0
); }
237 /* 2^250 - 2^0 */ mult(t0
,t1
,z2_50_0
);
239 /* 2^251 - 2^1 */ square(t1
,t0
);
240 /* 2^252 - 2^2 */ square(t0
,t1
);
241 /* 2^253 - 2^3 */ square(t1
,t0
);
242 /* 2^254 - 2^4 */ square(t0
,t1
);
243 /* 2^255 - 2^5 */ square(t1
,t0
);
244 /* 2^255 - 21 */ mult(out
,t1
,z11
);
247 int crypto_scalarmult(unsigned char *q
,
248 const unsigned char *n
,
249 const unsigned char *p
)
251 unsigned int work
[96];
254 for (i
= 0;i
< 32;++i
) e
[i
] = n
[i
];
258 for (i
= 0;i
< 32;++i
) work
[i
] = p
[i
];
260 recip(work
+ 32,work
+ 32);
261 mult(work
+ 64,work
,work
+ 32);
263 for (i
= 0;i
< 32;++i
) q
[i
] = work
[64 + i
];