chia_functions.hpp
Go to the documentation of this file.
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2018-2020 Mikhail Komarov <nemo@nil.foundation>
3 //
4 // MIT License
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
7 // of this software and associated documentation files (the "Software"), to deal
8 // in the Software without restriction, including without limitation the rights
9 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 // copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in all
14 // copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 // SOFTWARE.
23 //---------------------------------------------------------------------------//
24 
25 #ifndef CRYPTO3_VDF_CHIA_FUNCTIONS_HPP
26 #define CRYPTO3_VDF_CHIA_FUNCTIONS_HPP
27 
29 
30 namespace nil {
31  namespace crypto3 {
32  namespace vdf {
33  namespace detail {
34  struct chia_functions : public chia_policy {
36 
37  template<typename T>
39 
40 #if defined(CRYPTO3_VDF_GMP) || defined(CRYPTO3_VDF_MPIR)
41 
47  template<typename T>
48  inline static void normalize(state_type<T> &state) {
49  bool bleqa = (mpz_cmp(state.form.b, state.form.a) <= 0);
50  mpz_neg(state.form.a, state.form.a);
51  if (mpz_cmp(state.form.b, state.form.a) > 0 && bleqa) {
52  // Already normalized
53  return;
54  }
55  mpz_neg(state.form.a, state.form.a);
56  mpz_sub(state.r, state.form.a, state.form.b);
57 
58  mpz_mul_si(state.ra, state.form.a, -3);
59  bool falb = (mpz_cmp(state.ra, state.form.b) < 0);
60 
61  mpz_mul_2exp(state.ra, state.form.a, 1);
62 
63  if (mpz_cmp(state.r, state.ra) >= 0 && falb) {
64  mpz_add(state.form.c, state.form.c, state.form.a);
65  mpz_add(state.form.c, state.form.c, state.form.b);
66  mpz_add(state.form.b, state.form.b, state.ra);
67 
68  return;
69  }
70 
71  mpz_fdiv_q(state.r, state.r, state.ra);
72  mpz_mul(state.ra, state.r, state.form.a);
73  mpz_addmul(state.form.c, state.ra, state.r);
74  mpz_addmul(state.form.c, state.r, state.form.b);
75  mpz_mul_2exp(state.ra, state.ra, 1);
76  mpz_add(state.form.b, state.form.b, state.ra);
77  }
78 
86  template<typename T>
87  inline static bool test_reduction(binary_quadratic_form<T> &f) {
88  int a_b = mpz_cmpabs(f.a, f.b);
89  int c_b = mpz_cmpabs(f.c, f.b);
90 
91  if (a_b < 0 || c_b < 0) {
92  return false;
93  }
94 
95  int a_c = mpz_cmp(f.a, f.c);
96 
97  if (a_c > 0) {
98  mpz_swap(f.a, f.c);
99  mpz_neg(f.b, f.b);
100  }
101 
102  if (a_c == 0 && mpz_sgn(f.b) < 0) {
103  mpz_neg(f.b, f.b);
104  }
105 
106  return true;
107  }
108 
109  template<typename T>
110  inline static void fast_reduce(state_type<T> &state) {
111 
112  int64_t u, v, w, x, u_, v_, w_, x_;
113  int64_t delta, gamma, sgn;
114  int64_t a, b, c, a_, b_, c_;
115  int64_t aa, ab, ac, ba, bb, bc, ca, cb, cc;
116  signed long int a_exp, b_exp, c_exp, max_exp, min_exp;
117 
118  while (!test_reduction(state.form)) {
119 
120  a = mpz_get_si_2exp(&a_exp, state.form.a);
121  b = mpz_get_si_2exp(&b_exp, state.form.b);
122  c = mpz_get_si_2exp(&c_exp, state.form.c);
123 
124  max_exp = a_exp;
125  min_exp = a_exp;
126 
127  if (max_exp < b_exp) {
128  max_exp = b_exp;
129  }
130  if (min_exp > b_exp) {
131  min_exp = b_exp;
132  }
133 
134  if (max_exp < c_exp) {
135  max_exp = c_exp;
136  }
137  if (min_exp > c_exp) {
138  min_exp = c_exp;
139  }
140 
141  if (max_exp - min_exp > exp_threshold) {
142  normalize(state);
143  continue;
144  }
145  max_exp++; // for safety vs overflow
146 
147  // Ensure a, b, c are shifted so that a : b : c ratios are same as
148  // state.form.a : state.form.b : state.form.c
149  // a, b, c will be used as approximations to state.form.a, state.form.b, state.form.c
150  a >>= (max_exp - a_exp);
151  b >>= (max_exp - b_exp);
152  c >>= (max_exp - c_exp);
153 
154  u_ = 1;
155  v_ = 0;
156  w_ = 0;
157  x_ = 1;
158 
159  // We must be very careful about overflow in the following steps
160  do {
161  u = u_;
162  v = v_;
163  w = w_;
164  x = x_;
165  // Ensure that delta = floor ((b+c) / 2c)
166  delta = b >= 0 ? (b + c) / (c << 1) : -(-b + c) / (c << 1);
167  a_ = c;
168  c_ = c * delta;
169  b_ = -b + (c_ << 1);
170  gamma = b - c_;
171  c_ = a - delta * gamma;
172 
173  a = a_;
174  b = b_;
175  c = c_;
176 
177  u_ = v;
178  v_ = -u + delta * v;
179  w_ = x;
180  x_ = -w + delta * x;
181  // The condition (abs(v_) | abs(x_)) <= THRESH protects against overflow
182  } while ((abs(v_) | abs(x_)) <= threshold && a > c && c > 0);
183 
184  if ((abs(v_) | abs(x_)) <= threshold) {
185  u = u_;
186  v = v_;
187  w = w_;
188  x = x_;
189  }
190 
191  aa = u * u;
192  ab = u * w;
193  ac = w * w;
194  ba = u * v << 1;
195  bb = u * x + v * w;
196  bc = w * x << 1;
197  ca = v * v;
198  cb = v * x;
199  cc = x * x;
200 
201  // The following operations take 40% of the overall runtime.
202 
203  mpz_mul_si(state.faa, state.form.a, aa);
204  mpz_mul_si(state.fab, state.form.b, ab);
205  mpz_mul_si(state.fac, state.form.c, ac);
206 
207  mpz_mul_si(state.fba, state.form.a, ba);
208  mpz_mul_si(state.fbb, state.form.b, bb);
209  mpz_mul_si(state.fbc, state.form.c, bc);
210 
211  mpz_mul_si(state.fca, state.form.a, ca);
212  mpz_mul_si(state.fcb, state.form.b, cb);
213  mpz_mul_si(state.fcc, state.form.c, cc);
214 
215  mpz_add(state.form.a, state.faa, state.fab);
216  mpz_add(state.form.a, state.form.a, state.fac);
217 
218  mpz_add(state.form.b, state.fba, state.fbb);
219  mpz_add(state.form.b, state.form.b, state.fbc);
220 
221  mpz_add(state.form.c, state.fca, state.fcb);
222  mpz_add(state.form.c, state.form.c, state.fcc);
223  }
224  }
225 
226  template<typename I>
227  inline static long mpz_bits(I x) {
228  if (x->_mp_size == 0) {
229  return 0;
230  }
231  return mpz_sizeinbase(x, 2);
232  }
233 
234  inline static void mpz_addmul_si(mpz_t r, mpz_t x, long u) {
235  if (u >= 0) {
236  mpz_addmul_ui(r, x, u);
237  } else {
238  mpz_submul_ui(r, x, -u);
239  }
240  }
241 
242  inline static uint64_t signed_shift(uint64_t op, int shift) {
243  if (shift > 0) {
244  return op << shift;
245  }
246  if (shift <= -64) {
247  return 0;
248  }
249  return op >> (-shift);
250  }
251 
252  // Return an approximation x of the large mpz_t op by an int64_t and the exponent e adjustment.
253  // We must have (x * 2^e) / op = constant approximately.
254  inline static int64_t mpz_get_si_2exp(signed long int *exp, const mpz_t op) {
255  uint64_t size = mpz_size(op);
256  uint64_t last = mpz_getlimbn(op, size - 1);
257  uint64_t ret;
258  int lg2 = LOG2(last) + 1;
259  *exp = lg2;
260  ret = signed_shift(last, 63 - *exp);
261  if (size > 1) {
262  *exp += (size - 1) * 64;
263  uint64_t prev = mpz_getlimbn(op, size - 2);
264  ret += signed_shift(prev, -1 - lg2);
265  }
266  if (mpz_sgn(op) < 0) {
267  return -((int64_t)ret);
268  }
269  return ret;
270  }
271 
272  template<typename T>
273  static void mpz_xgcd_partial(state_type<T> &state) {
274  mp_limb_signed_t aa2, aa1, bb2, bb1, rr1, rr2, qq, bb, t1, t2, t3, i;
275  mp_limb_signed_t bits, bits1, bits2;
276 
277  mpz_set_si(state.y, 0);
278  mpz_set_si(state.x, -1);
279 
280  while (*state.bx->_mp_d != 0 && mpz_cmp(state.bx, state.L) > 0) {
281  bits2 = mpz_bits(state.by);
282  bits1 = mpz_bits(state.bx);
283  bits = __GMP_MAX(bits2, bits1) - GMP_LIMB_BITS + 1;
284  if (bits < 0) {
285  bits = 0;
286  }
287 
288  mpz_tdiv_q_2exp(state.r, state.by, bits);
289  rr2 = mpz_get_ui(state.r);
290  mpz_tdiv_q_2exp(state.r, state.bx, bits);
291  rr1 = mpz_get_ui(state.r);
292  mpz_tdiv_q_2exp(state.r, state.L, bits);
293  bb = mpz_get_ui(state.r);
294 
295  aa2 = 0;
296  aa1 = 1;
297  bb2 = 1;
298  bb1 = 0;
299 
300 
301  for (i = 0; rr1 != 0 && rr1 > bb; i++) {
302  qq = rr2 / rr1;
303 
304  t1 = rr2 - qq * rr1;
305  t2 = aa2 - qq * aa1;
306  t3 = bb2 - qq * bb1;
307 
308  if (i & 1) {
309  if (t1 < -t3 || rr1 - t1 < t2 - aa1) {
310  break;
311  }
312  } else {
313  if (t1 < -t2 || rr1 - t1 < t3 - bb1) {
314  break;
315  }
316  }
317 
318  rr2 = rr1;
319  rr1 = t1;
320  aa2 = aa1;
321  aa1 = t2;
322  bb2 = bb1;
323  bb1 = t3;
324  }
325 
326  if (i == 0) {
327  mpz_fdiv_qr(state.ra, state.by, state.by, state.bx);
328  mpz_swap(state.by, state.bx);
329 
330  mpz_submul(state.y, state.x, state.ra);
331  mpz_swap(state.y, state.x);
332  } else {
333  mpz_mul_si(state.r, state.by, bb2);
334  if (aa2 >= 0) {
335  mpz_addmul_ui(state.r, state.bx, aa2);
336  } else {
337  mpz_submul_ui(state.r, state.bx, -aa2);
338  }
339  mpz_mul_si(state.bx, state.bx, aa1);
340  if (bb1 >= 0) {
341  mpz_addmul_ui(state.bx, state.by, bb1);
342  } else {
343  mpz_submul_ui(state.bx, state.by, -bb1);
344  }
345  mpz_set(state.by, state.r);
346 
347  mpz_mul_si(state.r, state.y, bb2);
348  if (aa2 >= 0) {
349  mpz_addmul_ui(state.r, state.x, aa2);
350  } else {
351  mpz_submul_ui(state.r, state.x, -aa2);
352  }
353  mpz_mul_si(state.x, state.x, aa1);
354  if (bb1 >= 0) {
355  mpz_addmul_ui(state.x, state.y, bb1);
356  } else {
357  mpz_submul_ui(state.x, state.y, -bb1);
358  }
359  mpz_set(state.y, state.r);
360 
361  if (mpz_sgn(state.bx) < 0) {
362  mpz_neg(state.x, state.x);
363  mpz_neg(state.bx, state.bx);
364  }
365  if (mpz_sgn(state.by) < 0) {
366  mpz_neg(state.y, state.y);
367  mpz_neg(state.by, state.by);
368  }
369  }
370  }
371 
372  if (mpz_sgn(state.by) < 0) {
373  mpz_neg(state.y, state.y);
374  mpz_neg(state.x, state.x);
375  mpz_neg(state.by, state.by);
376  }
377  }
378 
379  // https://www.researchgate.net/publication/221451638_Computational_aspects_of_NUCOMP
380  template<typename T>
381  static void nudupl(state_type<T> &state) {
382 
383  mpz_gcdext(state.G, state.y, NULL, state.form.b, state.form.a);
384 
385 #if defined(CRYPTO3_VDF_GMP)
386 
387  mpz_divexact(state.By, state.form.a, state.G);
388  mpz_divexact(state.Dy, state.form.b, state.G);
389 
390 #elif defined(CRYPTO3_VDF_MPIR)
391 
392  mpz_divexact_gcd(state.By, state.form.a, state.G);
393  mpz_divexact_gcd(state.Dy, state.form.b, state.G);
394 
395 #endif
396 
397  mpz_mul(state.bx, state.y, state.form.c);
398  mpz_mod(state.bx, state.bx, state.By);
399 
400  mpz_set(state.by, state.By);
401 
402  if (mpz_cmpabs(state.by, state.L) <= 0) {
403  mpz_mul(state.dx, state.bx, state.Dy);
404  mpz_sub(state.dx, state.dx, state.form.c);
405  mpz_divexact(state.dx, state.dx, state.By);
406  mpz_mul(state.form.a, state.by, state.by);
407  mpz_mul(state.form.c, state.bx, state.bx);
408  mpz_add(state.t, state.bx, state.by);
409  mpz_mul(state.t, state.t, state.t);
410  mpz_sub(state.form.b, state.form.b, state.t);
411  mpz_add(state.form.b, state.form.b, state.form.a);
412  mpz_add(state.form.b, state.form.b, state.form.c);
413  mpz_mul(state.t, state.G, state.dx);
414  mpz_sub(state.form.c, state.form.c, state.t);
415  return;
416  }
417 
418  mpz_xgcd_partial(state);
419 
420  mpz_neg(state.x, state.x);
421  if (mpz_sgn((state.x)) > 0) {
422  mpz_neg(state.y, state.y);
423  } else {
424  mpz_neg(state.by, state.by);
425  }
426 
427  mpz_mul(state.ax, state.G, state.x);
428  mpz_mul(state.ay, state.G, state.y);
429 
430  mpz_mul(state.t, state.Dy, state.bx);
431  mpz_submul(state.t, state.form.c, state.x);
432  mpz_divexact(state.dx, state.t, state.By);
433  mpz_mul(state.Q1, state.y, state.dx);
434  mpz_add(state.dy, state.Q1, state.Dy);
435  mpz_add(state.form.b, state.dy, state.Q1);
436  mpz_mul(state.form.b, state.form.b, state.G);
437  mpz_divexact(state.dy, state.dy, state.x);
438  mpz_mul(state.form.a, state.by, state.by);
439  mpz_mul(state.form.c, state.bx, state.bx);
440  mpz_add(state.t, state.bx, state.by);
441  mpz_submul(state.form.b, state.t, state.t);
442  mpz_add(state.form.b, state.form.b, state.form.a);
443  mpz_add(state.form.b, state.form.b, state.form.c);
444  mpz_submul(state.form.a, state.ay, state.dy);
445  mpz_submul(state.form.c, state.ax, state.dx);
446  }
447 
448  template<typename T>
449  static inline void discriminant_generator(state_type<T> &state, const T &d) {
450  T denom;
451  mpz_init(denom);
452  mpz_set_ui(state.form.a, 2);
453  mpz_set_ui(state.form.b, 1);
454  mpz_set_ui(state.form.b, 1);
455  mpz_mul(state.form.c, state.form.b, state.form.b);
456  mpz_sub(state.form.c, state.form.c, d);
457  mpz_mul_ui(denom, state.form.a, 4);
458  mpz_fdiv_q(state.form.c, state.form.c, denom);
459  fast_reduce(state);
460  mpz_clear(denom);
461  }
462 
463 #elif defined(CRYPTO3_VDF_FLINT)
464 
470  template<typename T>
471  inline static void normalize(state_type<T> &state) {
472  fmpz_neg(state.r, state.form.a);
473  if (fmpz_cmp(state.form.b, state.r) > 0 && fmpz_cmp(state.form.b, state.form.a) <= 0) {
474  // Already normalized
475  return;
476  }
477  fmpz_sub(state.r, state.form.a, state.form.b);
478  fmpz_mul_2exp(state.ra, state.form.a, 1);
479  fmpz_fdiv_q(state.r, state.r, state.ra);
480  fmpz_mul(state.ra, state.r, state.form.a);
481  fmpz_addmul(state.form.c, state.ra, state.r);
482  fmpz_addmul(state.form.c, state.r, state.form.b);
483  fmpz_mul_2exp(state.ra, state.ra, 1);
484  fmpz_add(state.form.b, state.form.b, state.ra);
485  }
486 
494  template<typename T>
495  inline static bool test_reduction(binary_quadratic_form<T> &f) {
496  int a_b = fmpz_cmpabs(f.a, f.b);
497  int c_b = fmpz_cmpabs(f.c, f.b);
498 
499  if (a_b < 0 || c_b < 0) {
500  return false;
501  }
502 
503  int a_c = fmpz_cmp(f.a, f.c);
504 
505  if (a_c > 0) {
506  fmpz_swap(f.a, f.c);
507  fmpz_neg(f.b, f.b);
508  }
509 
510  if (a_c == 0 && fmpz_sgn(f.b) < 0) {
511  fmpz_neg(f.b, f.b);
512  }
513 
514  return true;
515  }
516 
522  template<typename T>
523  inline static void reduce(state_type<T> &state) {
524  normalize(state);
525  int cmp;
526  while (((cmp = fmpz_cmp(state.form.a, state.form.c)) > 0) ||
527  (cmp == 0 && fmpz_sgn(state.form.b) < 0)) {
528  fmpz_add(state.s, state.form.c, state.form.b);
529 
530  // x = 2c
531  fmpz_mul_2exp(state.p, state.form.c, 1);
532  fmpz_fdiv_q(state.s, state.s, state.p);
533 
534  fmpz_set(state.previous_form.a, state.form.a);
535  fmpz_set(state.previous_form.b, state.form.b);
536 
537  // b = -b
538  fmpz_set(state.form.a, state.form.c);
539  fmpz_neg(state.form.b, state.form.b);
540 
541  // x = 2sc
542  fmpz_mul(state.p, state.s, state.form.c);
543  fmpz_mul_2exp(state.p, state.p, 1);
544 
545  // b += 2sc
546  fmpz_add(state.form.b, state.form.b, state.p);
547 
548  // x = bs
549  fmpz_mul(state.p, state.previous_form.b, state.s);
550 
551  // s = s^2
552  fmpz_mul(state.s, state.s, state.s);
553 
554  // c = cs
555  fmpz_mul(state.form.c, state.form.c, state.s);
556 
557  // c -= cx
558  fmpz_sub(state.form.c, state.form.c, state.p);
559 
560  // c += a
561  fmpz_add(state.form.c, state.form.c, state.previous_form.a);
562  }
563  normalize(state);
564  }
565 
566  template<typename T>
567  inline static void fast_reduce(state_type<T> &state) {
568 
569  int64_t u, v, w, x, u_, v_, w_, x_;
570  int64_t delta, gamma, sgn;
571  int64_t a, b, c, a_, b_, c_;
572  int64_t aa, ab, ac, ba, bb, bc, ca, cb, cc;
573  signed long int a_exp, b_exp, c_exp, max_exp, min_exp;
574 
575  while (!test_reduction(state.form)) {
576 
577  a = fmpz_get_si_2exp(&a_exp, state.form.a);
578  b = fmpz_get_si_2exp(&b_exp, state.form.b);
579  c = fmpz_get_si_2exp(&c_exp, state.form.c);
580 
581  max_exp = a_exp;
582  min_exp = a_exp;
583 
584  if (max_exp < b_exp) {
585  max_exp = b_exp;
586  }
587  if (min_exp > b_exp) {
588  min_exp = b_exp;
589  }
590 
591  if (max_exp < c_exp) {
592  max_exp = c_exp;
593  }
594  if (min_exp > c_exp) {
595  min_exp = c_exp;
596  }
597 
598  if (max_exp - min_exp > exp_threshold) {
599  normalize(state);
600  continue;
601  }
602  max_exp++; // for safety vs overflow
603 
604  // Ensure a, b, c are shifted so that a : b : c ratios are same as
605  // state.form.a : state.form.b : state.form.c
606  // a, b, c will be used as approximations to state.form.a, state.form.b, state.form.c
607  a >>= (max_exp - a_exp);
608  b >>= (max_exp - b_exp);
609  c >>= (max_exp - c_exp);
610 
611  u_ = 1;
612  v_ = 0;
613  w_ = 0;
614  x_ = 1;
615 
616  // We must be very careful about overflow in the following steps
617  do {
618  u = u_;
619  v = v_;
620  w = w_;
621  x = x_;
622  // Ensure that delta = floor ((b+c) / 2c)
623  delta = b >= 0 ? (b + c) / (c << 1) : -(-b + c) / (c << 1);
624  a_ = c;
625  c_ = c * delta;
626  b_ = -b + (c_ << 1);
627  gamma = b - c_;
628  c_ = a - delta * gamma;
629 
630  a = a_;
631  b = b_;
632  c = c_;
633 
634  u_ = v;
635  v_ = -u + delta * v;
636  w_ = x;
637  x_ = -w + delta * x;
638  // The condition (abs(v_) | abs(x_)) <= THRESH protects against overflow
639  } while ((abs(v_) | abs(x_)) <= threshold && a > c && c > 0);
640 
641  if ((abs(v_) | abs(x_)) <= threshold) {
642  u = u_;
643  v = v_;
644  w = w_;
645  x = x_;
646  }
647 
648  aa = u * u;
649  ab = u * w;
650  ac = w * w;
651  ba = u * v << 1;
652  bb = u * x + v * w;
653  bc = w * x << 1;
654  ca = v * v;
655  cb = v * x;
656  cc = x * x;
657 
658  // The following operations take 40% of the overall runtime.
659 
660  fmpz_mul_si(state.faa, state.form.a, aa);
661  fmpz_mul_si(state.fab, state.form.b, ab);
662  fmpz_mul_si(state.fac, state.form.c, ac);
663 
664  fmpz_mul_si(state.fba, state.form.a, ba);
665  fmpz_mul_si(state.fbb, state.form.b, bb);
666  fmpz_mul_si(state.fbc, state.form.c, bc);
667 
668  fmpz_mul_si(state.fca, state.form.a, ca);
669  fmpz_mul_si(state.fcb, state.form.b, cb);
670  fmpz_mul_si(state.fcc, state.form.c, cc);
671 
672  fmpz_add(state.form.a, state.faa, state.fab);
673  fmpz_add(state.form.a, state.form.a, state.fac);
674 
675  fmpz_add(state.form.b, state.fba, state.fbb);
676  fmpz_add(state.form.b, state.form.b, state.fbc);
677 
678  fmpz_add(state.form.c, state.fca, state.fcb);
679  fmpz_add(state.form.c, state.form.c, state.fcc);
680  }
681  }
682 
683  template<typename I>
684  inline static long fmpz_bits(I x) {
685  if (x->_mp_size == 0) {
686  return 0;
687  }
688  return fmpz_sizeinbase(x, 2);
689  }
690 
691  inline static void fmpz_addmul_si(fmpz_t r, fmpz_t x, long u) {
692  if (u >= 0) {
693  fmpz_addmul_ui(r, x, u);
694  } else {
695  fmpz_submul_ui(r, x, -u);
696  }
697  }
698 
699  inline static uint64_t signed_shift(uint64_t op, int shift) {
700  if (shift > 0) {
701  return op << shift;
702  }
703  if (shift <= -64) {
704  return 0;
705  }
706  return op >> (-shift);
707  }
708 
709  // Return an approximation x of the large fmpz_t op by an int64_t and the exponent e adjustment.
710  // We must have (x * 2^e) / op = constant approximately.
711  inline static int64_t fmpz_get_si_2exp(signed long int *exp, const fmpz_t op) {
712  uint64_t size = fmpz_size(op);
713  uint64_t last = op[size - 1];
714  uint64_t ret;
715  int lg2 = LOG2(last) + 1;
716  *exp = lg2;
717  ret = signed_shift(last, 63 - *exp);
718  if (size > 1) {
719  *exp += (size - 1) * 64;
720  uint64_t prev = op[size - 2];
721  ret += signed_shift(prev, -1 - lg2);
722  }
723  if (fmpz_sgn(op) < 0) {
724  return -((int64_t)ret);
725  }
726  return ret;
727  }
728 
729  /* Find such g, x that a*x == g (mod n). Optimize for g == 1, so that x =
730  * a^(-1) (mod n) . */
731  void fast_gcdinv(fmpz_t g, fmpz_t x, const fmpz_t a, const fmpz_t n) {
732  int ret = fmpz_invmod(x, a, n);
733  if (ret) {
734  fmpz_one(g);
735  return;
736  }
737 
738  fmpz_gcdinv(g, x, a, n);
739  }
740 
741  // https://www.researchgate.net/publication/221451638_Computational_aspects_of_NUCOMP
742  template<typename T>
743  static void nudupl(state_type<T> &state) {
744 
745  fmpz_xgcd(state.G, state.y, NULL, state.form.b, state.form.a);
746 
747  fmpz_divexact(state.By, state.form.a, state.G);
748  fmpz_divexact(state.Dy, state.form.b, state.G);
749 
750  fmpz_mul(state.bx, state.y, state.form.c);
751  fmpz_mod(state.bx, state.bx, state.By);
752 
753  fmpz_set(state.by, state.By);
754 
755  if (fmpz_cmpabs(state.by, state.L) <= 0) {
756  fmpz_mul(state.dx, state.bx, state.Dy);
757  fmpz_sub(state.dx, state.dx, state.form.c);
758  fmpz_divexact(state.dx, state.dx, state.By);
759  fmpz_mul(state.form.a, state.by, state.by);
760  fmpz_mul(state.form.c, state.bx, state.bx);
761  fmpz_add(state.t, state.bx, state.by);
762  fmpz_mul(state.t, state.t, state.t);
763  fmpz_sub(state.form.b, state.form.b, state.t);
764  fmpz_add(state.form.b, state.form.b, state.form.a);
765  fmpz_add(state.form.b, state.form.b, state.form.c);
766  fmpz_mul(state.t, state.G, state.dx);
767  fmpz_sub(state.form.c, state.form.c, state.t);
768  return;
769  }
770 
771  fmpz_xgcd_partial(state.y, state.x, state.by, state.bx, state.L);
772 
773  fmpz_neg(state.x, state.x);
774  if (fmpz_sgn((state.x)) > 0) {
775  fmpz_neg(state.y, state.y);
776  } else {
777  fmpz_neg(state.by, state.by);
778  }
779 
780  fmpz_mul(state.ax, state.G, state.x);
781  fmpz_mul(state.ay, state.G, state.y);
782 
783  fmpz_mul(state.t, state.Dy, state.bx);
784  fmpz_submul(state.t, state.form.c, state.x);
785  fmpz_divexact(state.dx, state.t, state.By);
786  fmpz_mul(state.Q1, state.y, state.dx);
787  fmpz_add(state.dy, state.Q1, state.Dy);
788  fmpz_add(state.form.b, state.dy, state.Q1);
789  fmpz_mul(state.form.b, state.form.b, state.G);
790  fmpz_divexact(state.dy, state.dy, state.x);
791  fmpz_mul(state.form.a, state.by, state.by);
792  fmpz_mul(state.form.c, state.bx, state.bx);
793  fmpz_add(state.t, state.bx, state.by);
794  fmpz_submul(state.form.b, state.t, state.t);
795  fmpz_add(state.form.b, state.form.b, state.form.a);
796  fmpz_add(state.form.b, state.form.b, state.form.c);
797  fmpz_submul(state.form.a, state.ay, state.dy);
798  fmpz_submul(state.form.c, state.ax, state.dx);
799  }
800 
801  template<typename T>
802  static inline void discriminant_generator(state_type<T> &state, const T &d) {
803  T denom;
804  fmpz_init(denom);
805  fmpz_set_ui(state.form.a, 2);
806  fmpz_set_ui(state.form.b, 1);
807  fmpz_mul(state.form.c, state.form.b, state.form.b);
808  fmpz_sub(state.form.c, state.form.c, d);
809  fmpz_mul_ui(denom, state.form.a, 4);
810  fmpz_fdiv_q(state.form.c, state.form.c, denom);
811  fast_reduce(state);
812  fmpz_clear(denom);
813  }
814 
815 #elif defined(CRYPTO3_VDF_BOOST)
816 
817  template<typename T>
818  inline static void normalize(state_type<T> &state) {
819  bool bleqa = (state.form.b <= state.forma.a);
820  state.form.a = -state.form.a;
821  if (state.form.b > state.form.a && bleqa) {
822  // Already normalized
823  return;
824  }
825  state.form.a = -state.form.a;
826  state.r = state.form.a - state.form.b;
827 
828  state.ra = state.form.a * -3;
829  bool falb = (state.ra < state.form.b < 0);
830 
831  state.ra = state.form.a << 1;
832 
833  if (state.r >= state.ra && falb) {
834  state.form.c += state.form.a + state.form.b;
835  state.form.b += state.ra;
836 
837  return;
838  }
839 
840  state.r = state.r / state.ra;
841  state.ra = state.r * state.form.a;
842  state.form.c += state.ra * state.r;
843  state.form.c += state.r * state.form.b;
844  state.ra <<= 1;
845  state.form.b += state.ra;
846  }
847 
855  template<typename T>
856  inline static bool test_reduction(binary_quadratic_form<T> &f) {
857  int a_b = mpz_cmpabs(f.a, f.b);
858  int c_b = mpz_cmpabs(f.c, f.b);
859 
860  if (a_b < 0 || c_b < 0) {
861  return false;
862  }
863 
864  int a_c = mpz_cmp(f.a, f.c);
865 
866  if (a_c > 0) {
867  mpz_swap(f.a, f.c);
868  mpz_neg(f.b, f.b);
869  }
870 
871  if (a_c == 0 && mpz_sgn(f.b) < 0) {
872  mpz_neg(f.b, f.b);
873  }
874 
875  return true;
876  }
877 
878  template<typename T>
879  inline static void fast_reduce(state_type<T> &state) {
880 
881  int64_t u, v, w, x, u_, v_, w_, x_;
882  int64_t delta, gamma, sgn;
883  int64_t a, b, c, a_, b_, c_;
884  int64_t aa, ab, ac, ba, bb, bc, ca, cb, cc;
885  signed long int a_exp, b_exp, c_exp, max_exp, min_exp;
886 
887  while (!test_reduction(state.form)) {
888 
889  a = mpz_get_si_2exp(&a_exp, state.form.a);
890  b = mpz_get_si_2exp(&b_exp, state.form.b);
891  c = mpz_get_si_2exp(&c_exp, state.form.c);
892 
893  max_exp = a_exp;
894  min_exp = a_exp;
895 
896  if (max_exp < b_exp) {
897  max_exp = b_exp;
898  }
899  if (min_exp > b_exp) {
900  min_exp = b_exp;
901  }
902 
903  if (max_exp < c_exp) {
904  max_exp = c_exp;
905  }
906  if (min_exp > c_exp) {
907  min_exp = c_exp;
908  }
909 
910  if (max_exp - min_exp > exp_threshold) {
911  normalize(state);
912  continue;
913  }
914  max_exp++; // for safety vs overflow
915 
916  // Ensure a, b, c are shifted so that a : b : c ratios are same as
917  // state.form.a : state.form.b : state.form.c
918  // a, b, c will be used as approximations to state.form.a, state.form.b, state.form.c
919  a >>= (max_exp - a_exp);
920  b >>= (max_exp - b_exp);
921  c >>= (max_exp - c_exp);
922 
923  u_ = 1;
924  v_ = 0;
925  w_ = 0;
926  x_ = 1;
927 
928  // We must be very careful about overflow in the following steps
929  do {
930  u = u_;
931  v = v_;
932  w = w_;
933  x = x_;
934  // Ensure that delta = floor ((b+c) / 2c)
935  delta = b >= 0 ? (b + c) / (c << 1) : -(-b + c) / (c << 1);
936  a_ = c;
937  c_ = c * delta;
938  b_ = -b + (c_ << 1);
939  gamma = b - c_;
940  c_ = a - delta * gamma;
941 
942  a = a_;
943  b = b_;
944  c = c_;
945 
946  u_ = v;
947  v_ = -u + delta * v;
948  w_ = x;
949  x_ = -w + delta * x;
950  // The condition (abs(v_) | abs(x_)) <= THRESH protects against overflow
951  } while ((abs(v_) | abs(x_)) <= threshold && a > c && c > 0);
952 
953  if ((abs(v_) | abs(x_)) <= threshold) {
954  u = u_;
955  v = v_;
956  w = w_;
957  x = x_;
958  }
959 
960  aa = u * u;
961  ab = u * w;
962  ac = w * w;
963  ba = u * v << 1;
964  bb = u * x + v * w;
965  bc = w * x << 1;
966  ca = v * v;
967  cb = v * x;
968  cc = x * x;
969 
970  // The following operations take 40% of the overall runtime.
971 
972  state.faa = state.form.a * aa;
973  state.fab = state.form.b * ab;
974  state.fac = state.form.c * ac;
975 
976  state.fba = state.form.a * ba;
977  state.fbb = state.form.b * bb;
978  state.fbc = state.form.c * bc;
979 
980  state.fca = state.form.a * ca;
981  state.fcb = state.form.b * cb;
982  state.fcc = state.form.c * cc;
983 
984  state.form.a = state.faa + state.fab + state.fac;
985  state.form.b = state.fba + state.fbb + state.fbc;
986  state.form.c = state.fca + state.fcb + state.fcc;
987  }
988  }
989 
990  template<typename Backend, expression_template_option ExpressionTemplates>
991  inline static long mpz_bits(number<Backend, ExpressionTemplates> x) {
992  if (x->_mp_size == 0) {
993  return 0;
994  }
995  return mpz_sizeinbase(x, 2);
996  }
997 
998  inline static uint64_t signed_shift(uint64_t op, int shift) {
999  if (shift > 0) {
1000  return op << shift;
1001  }
1002  if (shift <= -64) {
1003  return 0;
1004  }
1005  return op >> (-shift);
1006  }
1007 
1008  // Return an approximation x of the large mpz_t op by an int64_t and the exponent e adjustment.
1009  // We must have (x * 2^e) / op = constant approximately.
1010  template<typename Backend, expression_template_option ExpressionTemplates>
1011  inline static int64_t mpz_get_si_2exp(signed long int *exp,
1012  const number<Backend, ExpressionTemplates> &op) {
1013  uint64_t size = mpz_size(op);
1014  uint64_t last = mpz_getlimbn(op, size - 1);
1015  uint64_t ret;
1016  int lg2 = LOG2(last) + 1;
1017  *exp = lg2;
1018  ret = signed_shift(last, 63 - *exp);
1019  if (size > 1) {
1020  *exp += (size - 1) * 64;
1021  uint64_t prev = mpz_getlimbn(op, size - 2);
1022  ret += signed_shift(prev, -1 - lg2);
1023  }
1024  if (mpz_sgn(op) < 0) {
1025  return -((int64_t)ret);
1026  }
1027  return ret;
1028  }
1029 
1030  template<typename T>
1031  static void mpz_xgcd_partial(state_type<T> &state) {
1032  mp_limb_signed_t aa2, aa1, bb2, bb1, rr1, rr2, qq, bb, t1, t2, t3, i;
1033  mp_limb_signed_t bits, bits1, bits2;
1034 
1035  mpz_set_si(state.y, 0);
1036  mpz_set_si(state.x, -1);
1037 
1038  while (*state.bx->_mp_d != 0 && mpz_cmp(state.bx, state.L) > 0) {
1039  bits2 = mpz_bits(state.by);
1040  bits1 = mpz_bits(state.bx);
1041  bits = __GMP_MAX(bits2, bits1) - GMP_LIMB_BITS + 1;
1042  if (bits < 0) {
1043  bits = 0;
1044  }
1045 
1046  mpz_tdiv_q_2exp(state.r, state.by, bits);
1047  rr2 = mpz_get_ui(state.r);
1048  mpz_tdiv_q_2exp(state.r, state.bx, bits);
1049  rr1 = mpz_get_ui(state.r);
1050  mpz_tdiv_q_2exp(state.r, state.L, bits);
1051  bb = mpz_get_ui(state.r);
1052 
1053  aa2 = 0;
1054  aa1 = 1;
1055  bb2 = 1;
1056  bb1 = 0;
1057 
1058 
1059  for (i = 0; rr1 != 0 && rr1 > bb; i++) {
1060  qq = rr2 / rr1;
1061 
1062  t1 = rr2 - qq * rr1;
1063  t2 = aa2 - qq * aa1;
1064  t3 = bb2 - qq * bb1;
1065 
1066  if (i & 1) {
1067  if (t1 < -t3 || rr1 - t1 < t2 - aa1) {
1068  break;
1069  }
1070  } else {
1071  if (t1 < -t2 || rr1 - t1 < t3 - bb1) {
1072  break;
1073  }
1074  }
1075 
1076  rr2 = rr1;
1077  rr1 = t1;
1078  aa2 = aa1;
1079  aa1 = t2;
1080  bb2 = bb1;
1081  bb1 = t3;
1082  }
1083 
1084  if (i == 0) {
1085  mpz_fdiv_qr(state.ra, state.by, state.by, state.bx);
1086  mpz_swap(state.by, state.bx);
1087 
1088  mpz_submul(state.y, state.x, state.ra);
1089  mpz_swap(state.y, state.x);
1090  } else {
1091  mpz_mul_si(state.r, state.by, bb2);
1092  if (aa2 >= 0) {
1093  mpz_addmul_ui(state.r, state.bx, aa2);
1094  } else {
1095  mpz_submul_ui(state.r, state.bx, -aa2);
1096  }
1097  mpz_mul_si(state.bx, state.bx, aa1);
1098  if (bb1 >= 0) {
1099  mpz_addmul_ui(state.bx, state.by, bb1);
1100  } else {
1101  mpz_submul_ui(state.bx, state.by, -bb1);
1102  }
1103  mpz_set(state.by, state.r);
1104 
1105  mpz_mul_si(state.r, state.y, bb2);
1106  if (aa2 >= 0) {
1107  mpz_addmul_ui(state.r, state.x, aa2);
1108  } else {
1109  mpz_submul_ui(state.r, state.x, -aa2);
1110  }
1111  mpz_mul_si(state.x, state.x, aa1);
1112  if (bb1 >= 0) {
1113  mpz_addmul_ui(state.x, state.y, bb1);
1114  } else {
1115  mpz_submul_ui(state.x, state.y, -bb1);
1116  }
1117  mpz_set(state.y, state.r);
1118 
1119  if (mpz_sgn(state.bx) < 0) {
1120  mpz_neg(state.x, state.x);
1121  mpz_neg(state.bx, state.bx);
1122  }
1123  if (mpz_sgn(state.by) < 0) {
1124  mpz_neg(state.y, state.y);
1125  mpz_neg(state.by, state.by);
1126  }
1127  }
1128  }
1129 
1130  if (mpz_sgn(state.by) < 0) {
1131  mpz_neg(state.y, state.y);
1132  mpz_neg(state.x, state.x);
1133  mpz_neg(state.by, state.by);
1134  }
1135  }
1136 
1137  // https://www.researchgate.net/publication/221451638_Computational_aspects_of_NUCOMP
1138  template<typename T>
1139  static void nudupl(state_type<T> &state) {
1140 
1141  mpz_gcdext(state.G, state.y, NULL, state.form.b, state.form.a);
1142 
1143  state.By = state.form.a / state.G;
1144  state.Dy = state.form.b / state.G;
1145 
1146  state.bx = (state.y * state.form.c) % state.By;
1147  state.by = state.By;
1148 
1149  if (state.by <= state.L) {
1150  state.dx = state.bx * state.Dy;
1151  state.dx = state.dx - state.form.c;
1152 
1153  state.dx = statte.dx / state.By;
1154 
1155  state.form.a = state.by * state.by;
1156  state.form.c = state.bx * state.bx;
1157 
1158  state.t = state.bx + state.by;
1159  state.t *= state.t;
1160 
1161  state.form.b = state.form.b - state.t;
1162  state.form.b = state.form.b + state.form.a;
1163  state.form.b = state.form.b + state.form.c;
1164 
1165  state.t = state.G * state.dx;
1166  state.form.c = state.form.c - state.t;
1167  return;
1168  }
1169 
1170  mpz_xgcd_partial(state);
1171 
1172  state.x = -state.x;
1173  if (state.x > 0) {
1174  state.y = -state.y;
1175  } else {
1176  state.by = -state.by;
1177  }
1178 
1179  state.ax = state.G * state.x;
1180  state.ay = state.G * state.y;
1181 
1182  state.t = state.Dy * state.bx;
1183  state.t -= state.form.c * state.x;
1184 
1185  state.dx = state.t / state.By;
1186 
1187  state.Q1 = state.y * state.dx;
1188  state.dy = state.dy + state.Q1;
1189  state.form.b = state.dy + state.Q1;
1190  state.form.b = state.form.b * state.G;
1191  state.dy = state.dy / state.x;
1192  state.form.a = state.by * state.by;
1193  state.form.c = state.bx * state.bx;
1194  state.t = state.bx + state.by;
1195  state.form.b -= state.t * state.t;
1196  state.form.b = state.form.b + state.form.a;
1197  state.form.b = state.form.b + state.form.c;
1198  state.form.a -= state.ay * state.dy;
1199  state.form.c -= state.ax * state.dx;
1200  }
1201 #endif
1202  };
1203  } // namespace detail
1204  } // namespace vdf
1205  } // namespace crypto3
1206 } // namespace nil
1207 
1208 #endif // CRYPTO3_CHIA_FUNCTIONS_HPP
#define LOG2(X)
Definition: chia_policy.hpp:80
constexpr nil::crypto3::detail::remove_complex_t< T > abs(T x)
computes the absolute value
Definition: algebra/include/nil/crypto3/algebra/scalar/math.hpp:76
T sgn(T x)
Sign function.
Definition: math/include/nil/crypto3/math/expressions/math.hpp:46
Definition: pair.hpp:31
Defines y = ax^2 + bxy + y^2.
Definition: chia_policy.hpp:58
number_type b
Definition: chia_policy.hpp:72
number_type a
Definition: chia_policy.hpp:71
number_type c
Definition: chia_policy.hpp:73
Definition: chia_functions.hpp:34
chia_policy policy_type
Definition: chia_functions.hpp:35
form_type form
Definition: chia_policy.hpp:143
number_type ra
Definition: chia_policy.hpp:138
number_type r
Definition: chia_policy.hpp:138
Definition: chia_policy.hpp:78
constexpr static const int64_t threshold
Definition: chia_policy.hpp:82
constexpr static const int64_t exp_threshold
Definition: chia_policy.hpp:84