Bitcoin Core 28.0.0
P2P Digital Currency
Loading...
Searching...
No Matches
field_10x26_impl.h
Go to the documentation of this file.
1/***********************************************************************
2 * Copyright (c) 2013, 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5 ***********************************************************************/
6
7#ifndef SECP256K1_FIELD_REPR_IMPL_H
8#define SECP256K1_FIELD_REPR_IMPL_H
9
10#include "checkmem.h"
11#include "util.h"
12#include "field.h"
13#include "modinv32_impl.h"
14
15#ifdef VERIFY
16static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
17 const uint32_t *d = a->n;
18 int m = a->normalized ? 1 : 2 * a->magnitude;
19 VERIFY_CHECK(d[0] <= 0x3FFFFFFUL * m);
20 VERIFY_CHECK(d[1] <= 0x3FFFFFFUL * m);
21 VERIFY_CHECK(d[2] <= 0x3FFFFFFUL * m);
22 VERIFY_CHECK(d[3] <= 0x3FFFFFFUL * m);
23 VERIFY_CHECK(d[4] <= 0x3FFFFFFUL * m);
24 VERIFY_CHECK(d[5] <= 0x3FFFFFFUL * m);
25 VERIFY_CHECK(d[6] <= 0x3FFFFFFUL * m);
26 VERIFY_CHECK(d[7] <= 0x3FFFFFFUL * m);
27 VERIFY_CHECK(d[8] <= 0x3FFFFFFUL * m);
28 VERIFY_CHECK(d[9] <= 0x03FFFFFUL * m);
29 if (a->normalized) {
30 if (d[9] == 0x03FFFFFUL) {
31 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
32 if (mid == 0x3FFFFFFUL) {
33 VERIFY_CHECK((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
34 }
35 }
36 }
37}
38#endif
39
41 r->n[0] = 0x3FFFFFFUL * 2 * m;
42 r->n[1] = 0x3FFFFFFUL * 2 * m;
43 r->n[2] = 0x3FFFFFFUL * 2 * m;
44 r->n[3] = 0x3FFFFFFUL * 2 * m;
45 r->n[4] = 0x3FFFFFFUL * 2 * m;
46 r->n[5] = 0x3FFFFFFUL * 2 * m;
47 r->n[6] = 0x3FFFFFFUL * 2 * m;
48 r->n[7] = 0x3FFFFFFUL * 2 * m;
49 r->n[8] = 0x3FFFFFFUL * 2 * m;
50 r->n[9] = 0x03FFFFFUL * 2 * m;
51}
52
54 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
55 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
56
57 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
58 uint32_t m;
59 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
60
61 /* The first pass ensures the magnitude is 1, ... */
62 t0 += x * 0x3D1UL; t1 += (x << 6);
63 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
64 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
65 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
66 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
67 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
68 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
69 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
70 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
71 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
72
73 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
74 VERIFY_CHECK(t9 >> 23 == 0);
75
76 /* At most a single final reduction is needed; check if the value is >= the field characteristic */
77 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
78 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
79
80 /* Apply the final reduction (for constant-time behaviour, we do it always) */
81 t0 += x * 0x3D1UL; t1 += (x << 6);
82 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
83 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
84 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
85 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
86 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
87 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
88 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
89 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
90 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
91
92 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
93 VERIFY_CHECK(t9 >> 22 == x);
94
95 /* Mask off the possible multiple of 2^256 from the final reduction */
96 t9 &= 0x03FFFFFUL;
97
98 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
99 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
100}
101
103 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
104 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
105
106 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
107 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
108
109 /* The first pass ensures the magnitude is 1, ... */
110 t0 += x * 0x3D1UL; t1 += (x << 6);
111 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
112 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
113 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
114 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
115 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
116 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
117 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
118 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
119 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
120
121 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
122 VERIFY_CHECK(t9 >> 23 == 0);
123
124 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
125 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
126}
127
129 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
130 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
131
132 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
133 uint32_t m;
134 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
135
136 /* The first pass ensures the magnitude is 1, ... */
137 t0 += x * 0x3D1UL; t1 += (x << 6);
138 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
139 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
140 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
141 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
142 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
143 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
144 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
145 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
146 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
147
148 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
149 VERIFY_CHECK(t9 >> 23 == 0);
150
151 /* At most a single final reduction is needed; check if the value is >= the field characteristic */
152 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
153 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
154
155 if (x) {
156 t0 += 0x3D1UL; t1 += (x << 6);
157 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
158 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
159 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
160 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
161 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
162 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
163 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
164 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
165 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
166
167 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
168 VERIFY_CHECK(t9 >> 22 == x);
169
170 /* Mask off the possible multiple of 2^256 from the final reduction */
171 t9 &= 0x03FFFFFUL;
172 }
173
174 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
175 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
176}
177
179 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
180 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
181
182 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
183 uint32_t z0, z1;
184
185 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
186 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
187
188 /* The first pass ensures the magnitude is 1, ... */
189 t0 += x * 0x3D1UL; t1 += (x << 6);
190 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
191 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
192 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
193 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
194 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
195 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
196 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
197 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
198 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
199 z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
200
201 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
202 VERIFY_CHECK(t9 >> 23 == 0);
203
204 return (z0 == 0) | (z1 == 0x3FFFFFFUL);
205}
206
208 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
209 uint32_t z0, z1;
210 uint32_t x;
211
212 t0 = r->n[0];
213 t9 = r->n[9];
214
215 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
216 x = t9 >> 22;
217
218 /* The first pass ensures the magnitude is 1, ... */
219 t0 += x * 0x3D1UL;
220
221 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
222 z0 = t0 & 0x3FFFFFFUL;
223 z1 = z0 ^ 0x3D0UL;
224
225 /* Fast return path should catch the majority of cases */
226 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
227 return 0;
228 }
229
230 t1 = r->n[1];
231 t2 = r->n[2];
232 t3 = r->n[3];
233 t4 = r->n[4];
234 t5 = r->n[5];
235 t6 = r->n[6];
236 t7 = r->n[7];
237 t8 = r->n[8];
238
239 t9 &= 0x03FFFFFUL;
240 t1 += (x << 6);
241
242 t1 += (t0 >> 26);
243 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
244 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
245 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
246 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
247 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
248 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
249 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
250 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
251 z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
252
253 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
254 VERIFY_CHECK(t9 >> 23 == 0);
255
256 return (z0 == 0) | (z1 == 0x3FFFFFFUL);
257}
258
260 r->n[0] = a;
261 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
262}
263
265 const uint32_t *t = a->n;
266 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
267}
268
270 return a->n[0] & 1;
271}
272
274 int i;
275 for (i=0; i<10; i++) {
276 a->n[i] = 0;
277 }
278}
279
281 int i;
282 for (i = 9; i >= 0; i--) {
283 if (a->n[i] > b->n[i]) {
284 return 1;
285 }
286 if (a->n[i] < b->n[i]) {
287 return -1;
288 }
289 }
290 return 0;
291}
292
293static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a) {
294 r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24);
295 r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22);
296 r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20);
297 r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18);
298 r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24);
299 r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22);
300 r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20);
301 r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18);
302 r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24);
303 r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14);
304}
305
306static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a) {
308 return !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
309}
310
312static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
313 r[0] = (a->n[9] >> 14) & 0xff;
314 r[1] = (a->n[9] >> 6) & 0xff;
315 r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3);
316 r[3] = (a->n[8] >> 16) & 0xff;
317 r[4] = (a->n[8] >> 8) & 0xff;
318 r[5] = a->n[8] & 0xff;
319 r[6] = (a->n[7] >> 18) & 0xff;
320 r[7] = (a->n[7] >> 10) & 0xff;
321 r[8] = (a->n[7] >> 2) & 0xff;
322 r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f);
323 r[10] = (a->n[6] >> 12) & 0xff;
324 r[11] = (a->n[6] >> 4) & 0xff;
325 r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf);
326 r[13] = (a->n[5] >> 14) & 0xff;
327 r[14] = (a->n[5] >> 6) & 0xff;
328 r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3);
329 r[16] = (a->n[4] >> 16) & 0xff;
330 r[17] = (a->n[4] >> 8) & 0xff;
331 r[18] = a->n[4] & 0xff;
332 r[19] = (a->n[3] >> 18) & 0xff;
333 r[20] = (a->n[3] >> 10) & 0xff;
334 r[21] = (a->n[3] >> 2) & 0xff;
335 r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f);
336 r[23] = (a->n[2] >> 12) & 0xff;
337 r[24] = (a->n[2] >> 4) & 0xff;
338 r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf);
339 r[26] = (a->n[1] >> 14) & 0xff;
340 r[27] = (a->n[1] >> 6) & 0xff;
341 r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3);
342 r[29] = (a->n[0] >> 16) & 0xff;
343 r[30] = (a->n[0] >> 8) & 0xff;
344 r[31] = a->n[0] & 0xff;
345}
346
348 /* For all legal values of m (0..31), the following properties hold: */
349 VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
350 VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
351 VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
352 VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m);
353
354 /* Due to the properties above, the left hand in the subtractions below is never less than
355 * the right hand. */
356 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
357 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
358 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
359 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
360 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
361 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
362 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
363 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
364 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
365 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
366}
367
369 r->n[0] *= a;
370 r->n[1] *= a;
371 r->n[2] *= a;
372 r->n[3] *= a;
373 r->n[4] *= a;
374 r->n[5] *= a;
375 r->n[6] *= a;
376 r->n[7] *= a;
377 r->n[8] *= a;
378 r->n[9] *= a;
379}
380
382 r->n[0] += a->n[0];
383 r->n[1] += a->n[1];
384 r->n[2] += a->n[2];
385 r->n[3] += a->n[3];
386 r->n[4] += a->n[4];
387 r->n[5] += a->n[5];
388 r->n[6] += a->n[6];
389 r->n[7] += a->n[7];
390 r->n[8] += a->n[8];
391 r->n[9] += a->n[9];
392}
393
395 r->n[0] += a;
396}
397
398#if defined(USE_EXTERNAL_ASM)
399
400/* External assembler implementation */
401void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
402void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
403
404#else
405
406#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
407
408SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
409 uint64_t c, d;
410 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
411 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
412 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
413
414 VERIFY_BITS(a[0], 30);
415 VERIFY_BITS(a[1], 30);
416 VERIFY_BITS(a[2], 30);
417 VERIFY_BITS(a[3], 30);
418 VERIFY_BITS(a[4], 30);
419 VERIFY_BITS(a[5], 30);
420 VERIFY_BITS(a[6], 30);
421 VERIFY_BITS(a[7], 30);
422 VERIFY_BITS(a[8], 30);
423 VERIFY_BITS(a[9], 26);
424 VERIFY_BITS(b[0], 30);
425 VERIFY_BITS(b[1], 30);
426 VERIFY_BITS(b[2], 30);
427 VERIFY_BITS(b[3], 30);
428 VERIFY_BITS(b[4], 30);
429 VERIFY_BITS(b[5], 30);
430 VERIFY_BITS(b[6], 30);
431 VERIFY_BITS(b[7], 30);
432 VERIFY_BITS(b[8], 30);
433 VERIFY_BITS(b[9], 26);
434
441 d = (uint64_t)a[0] * b[9]
442 + (uint64_t)a[1] * b[8]
443 + (uint64_t)a[2] * b[7]
444 + (uint64_t)a[3] * b[6]
445 + (uint64_t)a[4] * b[5]
446 + (uint64_t)a[5] * b[4]
447 + (uint64_t)a[6] * b[3]
448 + (uint64_t)a[7] * b[2]
449 + (uint64_t)a[8] * b[1]
450 + (uint64_t)a[9] * b[0];
451 /* VERIFY_BITS(d, 64); */
452 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
453 t9 = d & M; d >>= 26;
454 VERIFY_BITS(t9, 26);
455 VERIFY_BITS(d, 38);
456 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
457
458 c = (uint64_t)a[0] * b[0];
459 VERIFY_BITS(c, 60);
460 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
461 d += (uint64_t)a[1] * b[9]
462 + (uint64_t)a[2] * b[8]
463 + (uint64_t)a[3] * b[7]
464 + (uint64_t)a[4] * b[6]
465 + (uint64_t)a[5] * b[5]
466 + (uint64_t)a[6] * b[4]
467 + (uint64_t)a[7] * b[3]
468 + (uint64_t)a[8] * b[2]
469 + (uint64_t)a[9] * b[1];
470 VERIFY_BITS(d, 63);
471 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
472 u0 = d & M; d >>= 26; c += u0 * R0;
473 VERIFY_BITS(u0, 26);
474 VERIFY_BITS(d, 37);
475 VERIFY_BITS(c, 61);
476 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
477 t0 = c & M; c >>= 26; c += u0 * R1;
478 VERIFY_BITS(t0, 26);
479 VERIFY_BITS(c, 37);
480 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
481 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
482
483 c += (uint64_t)a[0] * b[1]
484 + (uint64_t)a[1] * b[0];
485 VERIFY_BITS(c, 62);
486 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
487 d += (uint64_t)a[2] * b[9]
488 + (uint64_t)a[3] * b[8]
489 + (uint64_t)a[4] * b[7]
490 + (uint64_t)a[5] * b[6]
491 + (uint64_t)a[6] * b[5]
492 + (uint64_t)a[7] * b[4]
493 + (uint64_t)a[8] * b[3]
494 + (uint64_t)a[9] * b[2];
495 VERIFY_BITS(d, 63);
496 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
497 u1 = d & M; d >>= 26; c += u1 * R0;
498 VERIFY_BITS(u1, 26);
499 VERIFY_BITS(d, 37);
500 VERIFY_BITS(c, 63);
501 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
502 t1 = c & M; c >>= 26; c += u1 * R1;
503 VERIFY_BITS(t1, 26);
504 VERIFY_BITS(c, 38);
505 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
506 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
507
508 c += (uint64_t)a[0] * b[2]
509 + (uint64_t)a[1] * b[1]
510 + (uint64_t)a[2] * b[0];
511 VERIFY_BITS(c, 62);
512 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
513 d += (uint64_t)a[3] * b[9]
514 + (uint64_t)a[4] * b[8]
515 + (uint64_t)a[5] * b[7]
516 + (uint64_t)a[6] * b[6]
517 + (uint64_t)a[7] * b[5]
518 + (uint64_t)a[8] * b[4]
519 + (uint64_t)a[9] * b[3];
520 VERIFY_BITS(d, 63);
521 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
522 u2 = d & M; d >>= 26; c += u2 * R0;
523 VERIFY_BITS(u2, 26);
524 VERIFY_BITS(d, 37);
525 VERIFY_BITS(c, 63);
526 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
527 t2 = c & M; c >>= 26; c += u2 * R1;
528 VERIFY_BITS(t2, 26);
529 VERIFY_BITS(c, 38);
530 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
531 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
532
533 c += (uint64_t)a[0] * b[3]
534 + (uint64_t)a[1] * b[2]
535 + (uint64_t)a[2] * b[1]
536 + (uint64_t)a[3] * b[0];
537 VERIFY_BITS(c, 63);
538 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
539 d += (uint64_t)a[4] * b[9]
540 + (uint64_t)a[5] * b[8]
541 + (uint64_t)a[6] * b[7]
542 + (uint64_t)a[7] * b[6]
543 + (uint64_t)a[8] * b[5]
544 + (uint64_t)a[9] * b[4];
545 VERIFY_BITS(d, 63);
546 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
547 u3 = d & M; d >>= 26; c += u3 * R0;
548 VERIFY_BITS(u3, 26);
549 VERIFY_BITS(d, 37);
550 /* VERIFY_BITS(c, 64); */
551 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
552 t3 = c & M; c >>= 26; c += u3 * R1;
553 VERIFY_BITS(t3, 26);
554 VERIFY_BITS(c, 39);
555 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
556 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
557
558 c += (uint64_t)a[0] * b[4]
559 + (uint64_t)a[1] * b[3]
560 + (uint64_t)a[2] * b[2]
561 + (uint64_t)a[3] * b[1]
562 + (uint64_t)a[4] * b[0];
563 VERIFY_BITS(c, 63);
564 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
565 d += (uint64_t)a[5] * b[9]
566 + (uint64_t)a[6] * b[8]
567 + (uint64_t)a[7] * b[7]
568 + (uint64_t)a[8] * b[6]
569 + (uint64_t)a[9] * b[5];
570 VERIFY_BITS(d, 62);
571 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
572 u4 = d & M; d >>= 26; c += u4 * R0;
573 VERIFY_BITS(u4, 26);
574 VERIFY_BITS(d, 36);
575 /* VERIFY_BITS(c, 64); */
576 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
577 t4 = c & M; c >>= 26; c += u4 * R1;
578 VERIFY_BITS(t4, 26);
579 VERIFY_BITS(c, 39);
580 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
581 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
582
583 c += (uint64_t)a[0] * b[5]
584 + (uint64_t)a[1] * b[4]
585 + (uint64_t)a[2] * b[3]
586 + (uint64_t)a[3] * b[2]
587 + (uint64_t)a[4] * b[1]
588 + (uint64_t)a[5] * b[0];
589 VERIFY_BITS(c, 63);
590 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
591 d += (uint64_t)a[6] * b[9]
592 + (uint64_t)a[7] * b[8]
593 + (uint64_t)a[8] * b[7]
594 + (uint64_t)a[9] * b[6];
595 VERIFY_BITS(d, 62);
596 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
597 u5 = d & M; d >>= 26; c += u5 * R0;
598 VERIFY_BITS(u5, 26);
599 VERIFY_BITS(d, 36);
600 /* VERIFY_BITS(c, 64); */
601 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
602 t5 = c & M; c >>= 26; c += u5 * R1;
603 VERIFY_BITS(t5, 26);
604 VERIFY_BITS(c, 39);
605 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
606 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
607
608 c += (uint64_t)a[0] * b[6]
609 + (uint64_t)a[1] * b[5]
610 + (uint64_t)a[2] * b[4]
611 + (uint64_t)a[3] * b[3]
612 + (uint64_t)a[4] * b[2]
613 + (uint64_t)a[5] * b[1]
614 + (uint64_t)a[6] * b[0];
615 VERIFY_BITS(c, 63);
616 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
617 d += (uint64_t)a[7] * b[9]
618 + (uint64_t)a[8] * b[8]
619 + (uint64_t)a[9] * b[7];
620 VERIFY_BITS(d, 61);
621 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
622 u6 = d & M; d >>= 26; c += u6 * R0;
623 VERIFY_BITS(u6, 26);
624 VERIFY_BITS(d, 35);
625 /* VERIFY_BITS(c, 64); */
626 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
627 t6 = c & M; c >>= 26; c += u6 * R1;
628 VERIFY_BITS(t6, 26);
629 VERIFY_BITS(c, 39);
630 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
631 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
632
633 c += (uint64_t)a[0] * b[7]
634 + (uint64_t)a[1] * b[6]
635 + (uint64_t)a[2] * b[5]
636 + (uint64_t)a[3] * b[4]
637 + (uint64_t)a[4] * b[3]
638 + (uint64_t)a[5] * b[2]
639 + (uint64_t)a[6] * b[1]
640 + (uint64_t)a[7] * b[0];
641 /* VERIFY_BITS(c, 64); */
642 VERIFY_CHECK(c <= 0x8000007C00000007ULL);
643 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
644 d += (uint64_t)a[8] * b[9]
645 + (uint64_t)a[9] * b[8];
646 VERIFY_BITS(d, 58);
647 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
648 u7 = d & M; d >>= 26; c += u7 * R0;
649 VERIFY_BITS(u7, 26);
650 VERIFY_BITS(d, 32);
651 /* VERIFY_BITS(c, 64); */
652 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
653 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
654 t7 = c & M; c >>= 26; c += u7 * R1;
655 VERIFY_BITS(t7, 26);
656 VERIFY_BITS(c, 38);
657 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
658 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
659
660 c += (uint64_t)a[0] * b[8]
661 + (uint64_t)a[1] * b[7]
662 + (uint64_t)a[2] * b[6]
663 + (uint64_t)a[3] * b[5]
664 + (uint64_t)a[4] * b[4]
665 + (uint64_t)a[5] * b[3]
666 + (uint64_t)a[6] * b[2]
667 + (uint64_t)a[7] * b[1]
668 + (uint64_t)a[8] * b[0];
669 /* VERIFY_BITS(c, 64); */
670 VERIFY_CHECK(c <= 0x9000007B80000008ULL);
671 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
672 d += (uint64_t)a[9] * b[9];
673 VERIFY_BITS(d, 57);
674 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
675 u8 = d & M; d >>= 26; c += u8 * R0;
676 VERIFY_BITS(u8, 26);
677 VERIFY_BITS(d, 31);
678 /* VERIFY_BITS(c, 64); */
679 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
680 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
681
682 r[3] = t3;
683 VERIFY_BITS(r[3], 26);
684 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
685 r[4] = t4;
686 VERIFY_BITS(r[4], 26);
687 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
688 r[5] = t5;
689 VERIFY_BITS(r[5], 26);
690 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
691 r[6] = t6;
692 VERIFY_BITS(r[6], 26);
693 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
694 r[7] = t7;
695 VERIFY_BITS(r[7], 26);
696 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
697
698 r[8] = c & M; c >>= 26; c += u8 * R1;
699 VERIFY_BITS(r[8], 26);
700 VERIFY_BITS(c, 39);
701 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
702 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
703 c += d * R0 + t9;
704 VERIFY_BITS(c, 45);
705 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
706 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
707 VERIFY_BITS(r[9], 22);
708 VERIFY_BITS(c, 46);
709 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
710 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
711 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
712
713 d = c * (R0 >> 4) + t0;
714 VERIFY_BITS(d, 56);
715 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
716 r[0] = d & M; d >>= 26;
717 VERIFY_BITS(r[0], 26);
718 VERIFY_BITS(d, 30);
719 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
720 d += c * (R1 >> 4) + t1;
721 VERIFY_BITS(d, 53);
722 VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
723 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
724 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
725 r[1] = d & M; d >>= 26;
726 VERIFY_BITS(r[1], 26);
727 VERIFY_BITS(d, 27);
728 VERIFY_CHECK(d <= 0x4000000ULL);
729 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
730 d += t2;
731 VERIFY_BITS(d, 27);
732 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
733 r[2] = d;
734 VERIFY_BITS(r[2], 27);
735 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
736}
737
738SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
739 uint64_t c, d;
740 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
741 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
742 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
743
744 VERIFY_BITS(a[0], 30);
745 VERIFY_BITS(a[1], 30);
746 VERIFY_BITS(a[2], 30);
747 VERIFY_BITS(a[3], 30);
748 VERIFY_BITS(a[4], 30);
749 VERIFY_BITS(a[5], 30);
750 VERIFY_BITS(a[6], 30);
751 VERIFY_BITS(a[7], 30);
752 VERIFY_BITS(a[8], 30);
753 VERIFY_BITS(a[9], 26);
754
760 d = (uint64_t)(a[0]*2) * a[9]
761 + (uint64_t)(a[1]*2) * a[8]
762 + (uint64_t)(a[2]*2) * a[7]
763 + (uint64_t)(a[3]*2) * a[6]
764 + (uint64_t)(a[4]*2) * a[5];
765 /* VERIFY_BITS(d, 64); */
766 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
767 t9 = d & M; d >>= 26;
768 VERIFY_BITS(t9, 26);
769 VERIFY_BITS(d, 38);
770 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
771
772 c = (uint64_t)a[0] * a[0];
773 VERIFY_BITS(c, 60);
774 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
775 d += (uint64_t)(a[1]*2) * a[9]
776 + (uint64_t)(a[2]*2) * a[8]
777 + (uint64_t)(a[3]*2) * a[7]
778 + (uint64_t)(a[4]*2) * a[6]
779 + (uint64_t)a[5] * a[5];
780 VERIFY_BITS(d, 63);
781 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
782 u0 = d & M; d >>= 26; c += u0 * R0;
783 VERIFY_BITS(u0, 26);
784 VERIFY_BITS(d, 37);
785 VERIFY_BITS(c, 61);
786 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
787 t0 = c & M; c >>= 26; c += u0 * R1;
788 VERIFY_BITS(t0, 26);
789 VERIFY_BITS(c, 37);
790 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
791 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
792
793 c += (uint64_t)(a[0]*2) * a[1];
794 VERIFY_BITS(c, 62);
795 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
796 d += (uint64_t)(a[2]*2) * a[9]
797 + (uint64_t)(a[3]*2) * a[8]
798 + (uint64_t)(a[4]*2) * a[7]
799 + (uint64_t)(a[5]*2) * a[6];
800 VERIFY_BITS(d, 63);
801 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
802 u1 = d & M; d >>= 26; c += u1 * R0;
803 VERIFY_BITS(u1, 26);
804 VERIFY_BITS(d, 37);
805 VERIFY_BITS(c, 63);
806 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
807 t1 = c & M; c >>= 26; c += u1 * R1;
808 VERIFY_BITS(t1, 26);
809 VERIFY_BITS(c, 38);
810 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
811 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
812
813 c += (uint64_t)(a[0]*2) * a[2]
814 + (uint64_t)a[1] * a[1];
815 VERIFY_BITS(c, 62);
816 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
817 d += (uint64_t)(a[3]*2) * a[9]
818 + (uint64_t)(a[4]*2) * a[8]
819 + (uint64_t)(a[5]*2) * a[7]
820 + (uint64_t)a[6] * a[6];
821 VERIFY_BITS(d, 63);
822 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
823 u2 = d & M; d >>= 26; c += u2 * R0;
824 VERIFY_BITS(u2, 26);
825 VERIFY_BITS(d, 37);
826 VERIFY_BITS(c, 63);
827 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
828 t2 = c & M; c >>= 26; c += u2 * R1;
829 VERIFY_BITS(t2, 26);
830 VERIFY_BITS(c, 38);
831 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
832 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
833
834 c += (uint64_t)(a[0]*2) * a[3]
835 + (uint64_t)(a[1]*2) * a[2];
836 VERIFY_BITS(c, 63);
837 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
838 d += (uint64_t)(a[4]*2) * a[9]
839 + (uint64_t)(a[5]*2) * a[8]
840 + (uint64_t)(a[6]*2) * a[7];
841 VERIFY_BITS(d, 63);
842 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
843 u3 = d & M; d >>= 26; c += u3 * R0;
844 VERIFY_BITS(u3, 26);
845 VERIFY_BITS(d, 37);
846 /* VERIFY_BITS(c, 64); */
847 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
848 t3 = c & M; c >>= 26; c += u3 * R1;
849 VERIFY_BITS(t3, 26);
850 VERIFY_BITS(c, 39);
851 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
852 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
853
854 c += (uint64_t)(a[0]*2) * a[4]
855 + (uint64_t)(a[1]*2) * a[3]
856 + (uint64_t)a[2] * a[2];
857 VERIFY_BITS(c, 63);
858 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
859 d += (uint64_t)(a[5]*2) * a[9]
860 + (uint64_t)(a[6]*2) * a[8]
861 + (uint64_t)a[7] * a[7];
862 VERIFY_BITS(d, 62);
863 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
864 u4 = d & M; d >>= 26; c += u4 * R0;
865 VERIFY_BITS(u4, 26);
866 VERIFY_BITS(d, 36);
867 /* VERIFY_BITS(c, 64); */
868 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
869 t4 = c & M; c >>= 26; c += u4 * R1;
870 VERIFY_BITS(t4, 26);
871 VERIFY_BITS(c, 39);
872 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
873 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
874
875 c += (uint64_t)(a[0]*2) * a[5]
876 + (uint64_t)(a[1]*2) * a[4]
877 + (uint64_t)(a[2]*2) * a[3];
878 VERIFY_BITS(c, 63);
879 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
880 d += (uint64_t)(a[6]*2) * a[9]
881 + (uint64_t)(a[7]*2) * a[8];
882 VERIFY_BITS(d, 62);
883 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
884 u5 = d & M; d >>= 26; c += u5 * R0;
885 VERIFY_BITS(u5, 26);
886 VERIFY_BITS(d, 36);
887 /* VERIFY_BITS(c, 64); */
888 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
889 t5 = c & M; c >>= 26; c += u5 * R1;
890 VERIFY_BITS(t5, 26);
891 VERIFY_BITS(c, 39);
892 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
893 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
894
895 c += (uint64_t)(a[0]*2) * a[6]
896 + (uint64_t)(a[1]*2) * a[5]
897 + (uint64_t)(a[2]*2) * a[4]
898 + (uint64_t)a[3] * a[3];
899 VERIFY_BITS(c, 63);
900 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
901 d += (uint64_t)(a[7]*2) * a[9]
902 + (uint64_t)a[8] * a[8];
903 VERIFY_BITS(d, 61);
904 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
905 u6 = d & M; d >>= 26; c += u6 * R0;
906 VERIFY_BITS(u6, 26);
907 VERIFY_BITS(d, 35);
908 /* VERIFY_BITS(c, 64); */
909 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
910 t6 = c & M; c >>= 26; c += u6 * R1;
911 VERIFY_BITS(t6, 26);
912 VERIFY_BITS(c, 39);
913 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
914 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
915
916 c += (uint64_t)(a[0]*2) * a[7]
917 + (uint64_t)(a[1]*2) * a[6]
918 + (uint64_t)(a[2]*2) * a[5]
919 + (uint64_t)(a[3]*2) * a[4];
920 /* VERIFY_BITS(c, 64); */
921 VERIFY_CHECK(c <= 0x8000007C00000007ULL);
922 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
923 d += (uint64_t)(a[8]*2) * a[9];
924 VERIFY_BITS(d, 58);
925 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
926 u7 = d & M; d >>= 26; c += u7 * R0;
927 VERIFY_BITS(u7, 26);
928 VERIFY_BITS(d, 32);
929 /* VERIFY_BITS(c, 64); */
930 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
931 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
932 t7 = c & M; c >>= 26; c += u7 * R1;
933 VERIFY_BITS(t7, 26);
934 VERIFY_BITS(c, 38);
935 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
936 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
937
938 c += (uint64_t)(a[0]*2) * a[8]
939 + (uint64_t)(a[1]*2) * a[7]
940 + (uint64_t)(a[2]*2) * a[6]
941 + (uint64_t)(a[3]*2) * a[5]
942 + (uint64_t)a[4] * a[4];
943 /* VERIFY_BITS(c, 64); */
944 VERIFY_CHECK(c <= 0x9000007B80000008ULL);
945 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
946 d += (uint64_t)a[9] * a[9];
947 VERIFY_BITS(d, 57);
948 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
949 u8 = d & M; d >>= 26; c += u8 * R0;
950 VERIFY_BITS(u8, 26);
951 VERIFY_BITS(d, 31);
952 /* VERIFY_BITS(c, 64); */
953 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
954 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
955
956 r[3] = t3;
957 VERIFY_BITS(r[3], 26);
958 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
959 r[4] = t4;
960 VERIFY_BITS(r[4], 26);
961 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
962 r[5] = t5;
963 VERIFY_BITS(r[5], 26);
964 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
965 r[6] = t6;
966 VERIFY_BITS(r[6], 26);
967 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
968 r[7] = t7;
969 VERIFY_BITS(r[7], 26);
970 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
971
972 r[8] = c & M; c >>= 26; c += u8 * R1;
973 VERIFY_BITS(r[8], 26);
974 VERIFY_BITS(c, 39);
975 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
976 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
977 c += d * R0 + t9;
978 VERIFY_BITS(c, 45);
979 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
980 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
981 VERIFY_BITS(r[9], 22);
982 VERIFY_BITS(c, 46);
983 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
984 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
985 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
986
987 d = c * (R0 >> 4) + t0;
988 VERIFY_BITS(d, 56);
989 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
990 r[0] = d & M; d >>= 26;
991 VERIFY_BITS(r[0], 26);
992 VERIFY_BITS(d, 30);
993 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
994 d += c * (R1 >> 4) + t1;
995 VERIFY_BITS(d, 53);
996 VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
997 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
998 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
999 r[1] = d & M; d >>= 26;
1000 VERIFY_BITS(r[1], 26);
1001 VERIFY_BITS(d, 27);
1002 VERIFY_CHECK(d <= 0x4000000ULL);
1003 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1004 d += t2;
1005 VERIFY_BITS(d, 27);
1006 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1007 r[2] = d;
1008 VERIFY_BITS(r[2], 27);
1009 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1010}
1011#endif
1012
1016
1020
1022 uint32_t mask0, mask1;
1023 volatile int vflag = flag;
1024 SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
1025 mask0 = vflag + ~((uint32_t)0);
1026 mask1 = ~mask0;
1027 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1028 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1029 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1030 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1031 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1032 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1033 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1034 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1035 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
1036 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
1037}
1038
1040 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
1041 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
1042 uint32_t one = (uint32_t)1;
1043 uint32_t mask = -(t0 & one) >> 6;
1044
1045 /* Bounds analysis (over the rationals).
1046 *
1047 * Let m = r->magnitude
1048 * C = 0x3FFFFFFUL * 2
1049 * D = 0x03FFFFFUL * 2
1050 *
1051 * Initial bounds: t0..t8 <= C * m
1052 * t9 <= D * m
1053 */
1054
1055 t0 += 0x3FFFC2FUL & mask;
1056 t1 += 0x3FFFFBFUL & mask;
1057 t2 += mask;
1058 t3 += mask;
1059 t4 += mask;
1060 t5 += mask;
1061 t6 += mask;
1062 t7 += mask;
1063 t8 += mask;
1064 t9 += mask >> 4;
1065
1066 VERIFY_CHECK((t0 & one) == 0);
1067
1068 /* t0..t8: added <= C/2
1069 * t9: added <= D/2
1070 *
1071 * Current bounds: t0..t8 <= C * (m + 1/2)
1072 * t9 <= D * (m + 1/2)
1073 */
1074
1075 r->n[0] = (t0 >> 1) + ((t1 & one) << 25);
1076 r->n[1] = (t1 >> 1) + ((t2 & one) << 25);
1077 r->n[2] = (t2 >> 1) + ((t3 & one) << 25);
1078 r->n[3] = (t3 >> 1) + ((t4 & one) << 25);
1079 r->n[4] = (t4 >> 1) + ((t5 & one) << 25);
1080 r->n[5] = (t5 >> 1) + ((t6 & one) << 25);
1081 r->n[6] = (t6 >> 1) + ((t7 & one) << 25);
1082 r->n[7] = (t7 >> 1) + ((t8 & one) << 25);
1083 r->n[8] = (t8 >> 1) + ((t9 & one) << 25);
1084 r->n[9] = (t9 >> 1);
1085
1086 /* t0..t8: shifted right and added <= C/4 + 1/2
1087 * t9: shifted right
1088 *
1089 * Current bounds: t0..t8 <= C * (m/2 + 1/2)
1090 * t9 <= D * (m/2 + 1/4)
1091 *
1092 * Therefore the output magnitude (M) has to be set such that:
1093 * t0..t8: C * M >= C * (m/2 + 1/2)
1094 * t9: D * M >= D * (m/2 + 1/4)
1095 *
1096 * It suffices for all limbs that, for any input magnitude m:
1097 * M >= m/2 + 1/2
1098 *
1099 * and since we want the smallest such integer value for M:
1100 * M == floor(m/2) + 1
1101 */
1102}
1103
1105 uint32_t mask0, mask1;
1106 volatile int vflag = flag;
1107 SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
1108 mask0 = vflag + ~((uint32_t)0);
1109 mask1 = ~mask0;
1110 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1111 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1112 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1113 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1114 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1115 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1116 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1117 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1118}
1119
1121 r->n[0] = a->n[0] | a->n[1] << 26;
1122 r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
1123 r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
1124 r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
1125 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
1126 r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
1127 r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
1128 r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
1129}
1130
1132 r->n[0] = a->n[0] & 0x3FFFFFFUL;
1133 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
1134 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
1135 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
1136 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
1137 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
1138 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
1139 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
1140 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
1141 r->n[9] = a->n[7] >> 10;
1142}
1143
1145 const uint32_t M26 = UINT32_MAX >> 6;
1146 const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
1147 a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
1148
1149 /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
1150 * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
1151 */
1152 VERIFY_CHECK(a0 >> 30 == 0);
1153 VERIFY_CHECK(a1 >> 30 == 0);
1154 VERIFY_CHECK(a2 >> 30 == 0);
1155 VERIFY_CHECK(a3 >> 30 == 0);
1156 VERIFY_CHECK(a4 >> 30 == 0);
1157 VERIFY_CHECK(a5 >> 30 == 0);
1158 VERIFY_CHECK(a6 >> 30 == 0);
1159 VERIFY_CHECK(a7 >> 30 == 0);
1160 VERIFY_CHECK(a8 >> 16 == 0);
1161
1162 r->n[0] = a0 & M26;
1163 r->n[1] = (a0 >> 26 | a1 << 4) & M26;
1164 r->n[2] = (a1 >> 22 | a2 << 8) & M26;
1165 r->n[3] = (a2 >> 18 | a3 << 12) & M26;
1166 r->n[4] = (a3 >> 14 | a4 << 16) & M26;
1167 r->n[5] = (a4 >> 10 | a5 << 20) & M26;
1168 r->n[6] = (a5 >> 6 | a6 << 24) & M26;
1169 r->n[7] = (a6 >> 2 ) & M26;
1170 r->n[8] = (a6 >> 28 | a7 << 2) & M26;
1171 r->n[9] = (a7 >> 24 | a8 << 6);
1172}
1173
1175 const uint32_t M30 = UINT32_MAX >> 2;
1176 const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4],
1177 a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9];
1178
1179 r->v[0] = (a0 | a1 << 26) & M30;
1180 r->v[1] = (a1 >> 4 | a2 << 22) & M30;
1181 r->v[2] = (a2 >> 8 | a3 << 18) & M30;
1182 r->v[3] = (a3 >> 12 | a4 << 14) & M30;
1183 r->v[4] = (a4 >> 16 | a5 << 10) & M30;
1184 r->v[5] = (a5 >> 20 | a6 << 6) & M30;
1185 r->v[6] = (a6 >> 24 | a7 << 2
1186 | a8 << 28) & M30;
1187 r->v[7] = (a8 >> 2 | a9 << 24) & M30;
1188 r->v[8] = a9 >> 6;
1189}
1190
1192 {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}},
1193 0x2DDACACFL
1194};
1195
1205
1215
1217 secp256k1_fe tmp;
1219 int jac, ret;
1220
1221 tmp = *x;
1223 /* secp256k1_jacobi32_maybe_var cannot deal with input 0. */
1224 if (secp256k1_fe_is_zero(&tmp)) return 1;
1225 secp256k1_fe_to_signed30(&s, &tmp);
1227 if (jac == 0) {
1228 /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back
1229 * to computing a square root. This should be extremely rare with random
1230 * input (except in VERIFY mode, where a lower iteration count is used). */
1231 secp256k1_fe dummy;
1232 ret = secp256k1_fe_sqrt(&dummy, &tmp);
1233 } else {
1234 ret = jac >= 0;
1235 }
1236 return ret;
1237}
1238
1239#endif /* SECP256K1_FIELD_REPR_IMPL_H */
int ret
#define SECP256K1_CHECKMEM_CHECK_VERIFY(p, len)
Definition checkmem.h:99
unsigned char u8
static int secp256k1_fe_sqrt(secp256k1_fe *SECP256K1_RESTRICT r, const secp256k1_fe *SECP256K1_RESTRICT a)
Compute a square root of a field element.
#define secp256k1_fe_normalize_var
Definition field.h:80
#define secp256k1_fe_is_zero
Definition field.h:85
#define secp256k1_fe_normalize
Definition field.h:78
static SECP256K1_INLINE void secp256k1_fe_impl_half(secp256k1_fe *r)
static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a)
static void secp256k1_fe_impl_normalize_weak(secp256k1_fe *r)
static int secp256k1_fe_impl_is_square_var(const secp256k1_fe *x)
static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a)
Convert a field element to a 32-byte big endian value.
static SECP256K1_INLINE void secp256k1_fe_impl_add(secp256k1_fe *r, const secp256k1_fe *a)
static SECP256K1_INLINE void secp256k1_fe_impl_clear(secp256k1_fe *a)
static SECP256K1_INLINE void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a)
static SECP256K1_INLINE void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a)
static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe
static SECP256K1_INLINE int secp256k1_fe_impl_is_zero(const secp256k1_fe *a)
static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m)
static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a)
static SECP256K1_INLINE void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t *SECP256K1_RESTRICT b)
static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32_signed30 *a)
static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag)
static SECP256K1_INLINE void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m)
static SECP256K1_INLINE void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a)
static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b)
#define VERIFY_BITS(x, n)
static int secp256k1_fe_impl_normalizes_to_zero(const secp256k1_fe *r)
static void secp256k1_fe_impl_inv_var(secp256k1_fe *r, const secp256k1_fe *x)
static SECP256K1_INLINE void secp256k1_fe_impl_sqr(secp256k1_fe *r, const secp256k1_fe *a)
static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a)
static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_fe *a)
static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a)
static SECP256K1_INLINE void secp256k1_fe_impl_add_int(secp256k1_fe *r, int a)
static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r)
static void secp256k1_fe_impl_normalize(secp256k1_fe *r)
static SECP256K1_INLINE void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag)
static void secp256k1_fe_impl_inv(secp256k1_fe *r, const secp256k1_fe *x)
static void secp256k1_fe_impl_normalize_var(secp256k1_fe *r)
static SECP256K1_INLINE int secp256k1_fe_impl_is_odd(const secp256k1_fe *a)
static SECP256K1_INLINE void secp256k1_fe_impl_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe *SECP256K1_RESTRICT b)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
#define SECP256K1_INLINE
Definition util.h:48
#define VERIFY_CHECK(cond)
Definition util.h:153
#define SECP256K1_RESTRICT
Definition util.h:188
This field implementation represents the value as 10 uint32_t limbs in base 2^26.
Definition field_10x26.h:14
uint32_t n[10]
Definition field_10x26.h:22