From d50e2b04151b8d213574387bd55c5dc7c8aab818 Mon Sep 17 00:00:00 2001
From: user202729 <25191436+user202729@users.noreply.github.com>
Date: Tue, 30 Dec 2025 03:08:04 +0700
Subject: [PATCH] Implement arb_poly_mullow_convex
---
src/arb_poly.h | 2 +
src/arb_poly/mullow_convex.c | 586 +++++++++++++++++++++++++++++++++++
src/qqbar/atan_pi.c | 1 +
3 files changed, 589 insertions(+)
create mode 100644 src/arb_poly/mullow_convex.c
diff --git a/src/arb_poly.h b/src/arb_poly.h
index bf71f2b744..9aa2d77ed7 100644
--- a/src/arb_poly.h
+++ b/src/arb_poly.h
@@ -254,6 +254,8 @@ void _arb_poly_mullow_block(arb_ptr C,
void arb_poly_mullow_block(arb_poly_t res, const arb_poly_t poly1,
const arb_poly_t poly2, slong len, slong prec);
+void arb_poly_mullow_convex(arb_poly_t c, const arb_poly_t a, const arb_poly_t b, slong prec);
+
void _arb_poly_mullow(arb_ptr C,
arb_srcptr A, slong lenA,
arb_srcptr B, slong lenB, slong n, slong prec);
diff --git a/src/arb_poly/mullow_convex.c b/src/arb_poly/mullow_convex.c
new file mode 100644
index 0000000000..f812982059
--- /dev/null
+++ b/src/arb_poly/mullow_convex.c
@@ -0,0 +1,586 @@
+/*
+ Copyright (C) 2025 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include
+#include "arb_poly.h"
+#include "fmpz_poly.h"
+
+// f must be preallocated with size at least n+1, where n=a.degree().
+// For i=0..n, f[i] will be set to some integer such that 2^f[i] >= a[i].
+// return 1 if succeed, 0 if fail (e.g. exponent overflows slong).
+// somewhat inefficient, could be made more efficient by not handling COEFF_IS_MPZ() case,
+// but this is probably not the bottleneck
+static int _arb_poly_exponent_sequence(slong* f, const arb_poly_t a) {
+ slong n = arb_poly_degree(a);
+ for (slong i = 0; i <= n; ++i) {
+ arb_srcptr c = &a->coeffs[i];
+
+ slong mid_exp = arf_abs_bound_lt_2exp_si(arb_midref(c));
+ if (mid_exp == ARF_PREC_EXACT)
+ return 0;
+
+ slong rad_exp;
+ if (mag_is_special(arb_radref(c))) {
+ rad_exp = -ARF_PREC_EXACT;
+ } else if (mag_is_inf(arb_radref(c))) {
+ return 0;
+ } else {
+ if (fmpz_fits_si(MAG_EXPREF(arb_radref(c))))
+ rad_exp = fmpz_get_si(MAG_EXPREF(arb_radref(c)));
+ else
+ rad_exp = fmpz_sgn(MAG_EXPREF(arb_radref(c))) < 0 ? -ARF_PREC_EXACT : ARF_PREC_EXACT;
+
+ if (rad_exp < -ARF_PREC_EXACT)
+ rad_exp = -ARF_PREC_EXACT;
+ if (rad_exp > ARF_PREC_EXACT)
+ rad_exp = ARF_PREC_EXACT;
+ if (rad_exp == ARF_PREC_EXACT)
+ return 0;
+ }
+
+ f[i] = FLINT_MAX(mid_exp, rad_exp) + 1;
+ }
+
+ return 1;
+}
+
+// hat_f_indices gets assigned the indices f[i] on the upper convex hull.
+// Subsequent entries may get assigned random values.
+// f[0..n] (n inclusive) are considered.
+// hat_f_indices must have preallocated size at least n+1.
+// The returned value is the number of assigned entries.
+// caveat: we don't try to check for integer overflow in the multiplication.
+static size_t _slong_vec_convex_hull(slong* hat_f_indices, const slong* f, size_t n) {
+ size_t hull_size = 1;
+ hat_f_indices[0] = 0;
+ for (size_t i = 1; i <= n; ++i) {
+ while (hull_size >= 2) {
+ size_t j1 = hat_f_indices[hull_size-1];
+ size_t j2 = hat_f_indices[hull_size-2];
+ if ((slong)(i - j2) * (f[j1] - f[j2]) <= (slong)(j1 - j2) * (f[i] - f[j2])) {
+ hull_size--;
+ } else {
+ break;
+ }
+ }
+ hat_f_indices[hull_size++] = i;
+ }
+ return hull_size;
+}
+
+static void _slong_vec_fill_linear_interpolation(slong* f, slong n, int should_round_up) {
+ // f[0] and f[n] are input, mutate f[1..n-1]
+ if (n <= 1) return;
+ if (f[0] == f[n]) {
+ for (slong i = 1; i < n; ++i)
+ f[i] = f[0];
+ } else {
+ slong q = (f[n] - f[0]) / n, r = (f[n] - f[0]) % n;
+ slong v = f[0], remain = 0;
+ if (f[0] < f[n]) {
+ for (slong i = 1; i < n; ++i) {
+ v += q;
+ remain += r;
+ if (remain >= n) { remain -= n; ++v; }
+ f[i] = v + (remain && should_round_up);
+ }
+ } else {
+ for (slong i = 1; i < n; ++i) {
+ v += q;
+ remain += r;
+ if (remain <= -n) { remain += n; --v; }
+ f[i] = v - (remain && !should_round_up);
+ }
+ }
+ }
+}
+
+static void _slong_vec_fill_linear_interpolation_round_down(slong* f, slong n) {
+ _slong_vec_fill_linear_interpolation(f, n, 0);
+}
+
+static void _slong_vec_fill_linear_interpolation_round_up(slong* f, slong n) {
+ _slong_vec_fill_linear_interpolation(f, n, 1);
+}
+
+static void build_ceil_hat(slong *ceil_hat, const slong *f, const slong *hat_f_indices, size_t hat_f_indices_size)
+{
+ FLINT_ASSERT(hat_f_indices[0] == 0);
+ ceil_hat[0] = f[0];
+ slong a = 0;
+ for (size_t t = 1; t < hat_f_indices_size; ++t) {
+ slong b = hat_f_indices[t];
+ ceil_hat[b] = f[b];
+ _slong_vec_fill_linear_interpolation_round_up(ceil_hat + a, b - a);
+ a = b;
+ }
+}
+
+// given f[0..n inclusive], g[0..m inclusive],
+// hat_f_indices[0..hat_f_indices_size-1],
+// hat_g_indices[0..hat_g_indices_size-1],
+// compute the convolution of hat_f and hat_g.
+static void _slong_vec_convex_maxplus_convolve(
+ slong* j_down, // [0..n inclusive]
+ // [i] = j^↓_i i.e. find a witness path of pairs (i, j) such that hat_f[i] + hat_g[j] == hat_h[i+j]
+ // for all such pairs, then j^↓_i is the minimum j value for each i value
+ slong* floor_h, // [0..n+m inclusive]. [k] = floor(hat_h_k)
+ const slong* f, size_t n, const slong* hat_f_indices, size_t hat_f_indices_size,
+ const slong* g, size_t m, const slong* hat_g_indices, size_t hat_g_indices_size
+ ) {
+ size_t a = 0, b = 0;
+ FLINT_ASSERT(hat_f_indices[0] == 0);
+ FLINT_ASSERT(hat_g_indices[0] == 0);
+ floor_h[0] = f[0] + g[0];
+ j_down[0] = 0;
+ while (a < hat_f_indices_size - 1 || b < hat_g_indices_size - 1) {
+ if (a < hat_f_indices_size - 1 && (b >= hat_g_indices_size - 1 ||
+ // note: multiplication overflow not checked!
+ (f[hat_f_indices[a+1]] - f[hat_f_indices[a]]) * (hat_g_indices[b+1] - hat_g_indices[b]) >
+ (g[hat_g_indices[b+1]] - g[hat_g_indices[b]]) * (hat_f_indices[a+1] - hat_f_indices[a]))) {
+ floor_h[hat_f_indices[a+1] + hat_g_indices[b]] = f[hat_f_indices[a+1]] + g[hat_g_indices[b]];
+ slong j = hat_g_indices[b];
+ for (slong i = hat_f_indices[a] + 1; i <= hat_f_indices[a+1]; ++i)
+ j_down[i] = j;
+ _slong_vec_fill_linear_interpolation_round_down(floor_h + hat_f_indices[a] + hat_g_indices[b],
+ hat_f_indices[a+1] - hat_f_indices[a]);
+ ++a;
+ } else {
+ floor_h[hat_f_indices[a] + hat_g_indices[b+1]] = f[hat_f_indices[a]] + g[hat_g_indices[b+1]];
+ _slong_vec_fill_linear_interpolation_round_down(floor_h + hat_f_indices[a] + hat_g_indices[b],
+ hat_g_indices[b+1] - hat_g_indices[b]);
+ ++b;
+ }
+ }
+}
+
+static slong lower_d(const slong* floor_h, const slong* ceil_hat_f, const slong* ceil_hat_g,
+ size_t i, size_t j) { return floor_h[i+j] - ceil_hat_f[i] - ceil_hat_g[j]; }
+
+static int is_below_witness_path(const slong* j_down, size_t i, size_t j) { return j < j_down[i]; }
+static int is_above_witness_path(const slong* j_down, size_t i, size_t j) { return j > j_down[i]; }
+
+static int _arb_poly_should_add_product_early_return(
+ size_t ia, size_t ib, size_t ja, size_t jb,
+ const slong* j_down, const slong* floor_h, const slong* ceil_hat_f, const slong* ceil_hat_g, slong prec
+ ) {
+ if (is_below_witness_path(j_down, ia, jb) && lower_d(floor_h, ceil_hat_f, ceil_hat_g, ia, jb) >= prec) return 1;
+ if (is_above_witness_path(j_down, ib, ja) && lower_d(floor_h, ceil_hat_f, ceil_hat_g, ib, ja) >= prec) return 1;
+ return 0;
+}
+
+static int _arb_poly_are_all_coefficients_nonnegative(const arb_poly_t a)
+{
+ for (slong i = 0; i < arb_poly_length(a); ++i)
+ if (!arb_is_nonnegative(&a->coeffs[i])) return 0;
+ return 1;
+}
+
+// underestimate by at most 1
+// output vectors alloc guaranteed to be at least length of input
+// lo[i] is guaranteed to be <= up[i]
+static void _arb_poly_lower_and_upper_underestimate(fmpz_poly_t lo, fmpz_poly_t up, const arb_poly_t a)
+{
+ slong len = arb_poly_length(a);
+ fmpz_poly_fit_length(lo, len);
+ fmpz_poly_fit_length(up, len);
+
+ arf_t L, U;
+ arf_init(L);
+ arf_init(U);
+ for (slong i = 0; i < len; ++i) {
+ if (fmpz_cmp_si(MAG_EXPREF(arb_radref(&a->coeffs[i])), 0) < 0) {
+ // arb_radref(&a->coeffs[i]) < 2^-1 = 0.5
+ arf_get_fmpz(&lo->coeffs[i], arb_midref(&a->coeffs[i]), ARF_RND_NEAR);
+ fmpz_set(&up->coeffs[i], &lo->coeffs[i]);
+ } else {
+ // in this branch, lo[i] <= up[i] is guaranteed
+ slong tmp = arf_abs_bound_lt_2exp_si(arb_midref(&a->coeffs[i]));
+ arb_get_interval_arf(L, U, &a->coeffs[i], FLINT_MAX(tmp, 0) + 2);
+ arf_get_fmpz(&lo->coeffs[i], L, ARF_RND_UP); // because underestimate
+ arf_get_fmpz(&up->coeffs[i], U, ARF_RND_DOWN);
+ if (fmpz_cmp(&lo->coeffs[i], &up->coeffs[i]) > 0) // slightly inefficient
+ fmpz_set(&lo->coeffs[i], &up->coeffs[i]);
+ }
+ FLINT_ASSERT(fmpz_cmp(&lo->coeffs[i], &up->coeffs[i]) <= 0);
+ }
+ arf_clear(L);
+ arf_clear(U);
+ _fmpz_poly_set_length(lo, len);
+ _fmpz_poly_set_length(up, len);
+ _fmpz_poly_normalise(lo);
+ _fmpz_poly_normalise(up);
+}
+
+// underestimate by at most 0.5
+// output vectors alloc guaranteed to be at least length of input
+static void arb_poly_get_mid_rad_fmpz_poly(fmpz_poly_t mid, fmpz_poly_t rad, const arb_poly_t a)
+{
+ slong len = arb_poly_length(a);
+ fmpz_poly_fit_length(mid, len);
+ fmpz_poly_fit_length(rad, len);
+
+ for (slong i = 0; i < len; ++i) {
+ arb_srcptr c = &a->coeffs[i];
+ arf_get_fmpz(&mid->coeffs[i], arb_midref(c), ARF_RND_NEAR);
+ mag_get_fmpz(&rad->coeffs[i], arb_radref(c));
+ }
+
+ _fmpz_poly_set_length(mid, len);
+ _fmpz_poly_set_length(rad, len);
+ _fmpz_poly_normalise(mid);
+ _fmpz_poly_normalise(rad);
+}
+
+// compare: _arb_poly_addmullow_rad
+static void _arb_poly_convolve_using_fmpz_multiply(arb_poly_t c, const arb_poly_t a, const arb_poly_t b, slong prec)
+{
+ slong len = arb_poly_degree(a) + arb_poly_degree(b) + 1;
+ arb_poly_fit_length(c, len);
+
+ if (_arb_poly_are_all_coefficients_nonnegative(a) && _arb_poly_are_all_coefficients_nonnegative(b)) {
+ // use only 2 multiplications (both are high-precision. Not sure if it's actually faster)
+ fmpz_poly_t a_lower, a_upper, b_lower, b_upper, c_lower, c_upper;
+
+ fmpz_poly_init(a_lower);
+ fmpz_poly_init(a_upper);
+ fmpz_poly_init(b_lower);
+ fmpz_poly_init(b_upper);
+ _arb_poly_lower_and_upper_underestimate(a_lower, a_upper, a);
+ _arb_poly_lower_and_upper_underestimate(b_lower, b_upper, b);
+
+ for (slong i = 0; i < fmpz_poly_length(a_lower); ++i) {
+ FLINT_ASSERT(fmpz_cmp_si(&a_lower->coeffs[i], 0) >= 0);
+ FLINT_ASSERT(fmpz_cmp(&a_lower->coeffs[i], &a_upper->coeffs[i]) <= 0);
+ }
+ for (slong i = 0; i < fmpz_poly_length(b_lower); ++i) {
+ FLINT_ASSERT(fmpz_cmp_si(&b_lower->coeffs[i], 0) >= 0);
+ FLINT_ASSERT(fmpz_cmp(&b_lower->coeffs[i], &b_upper->coeffs[i]) <= 0);
+ }
+
+ fmpz_poly_init(c_lower);
+ fmpz_poly_mul(c_lower, a_lower, b_lower);
+ fmpz_poly_clear(a_lower);
+ fmpz_poly_clear(b_lower);
+
+ fmpz_poly_init(c_upper);
+ fmpz_poly_mul(c_upper, a_upper, b_upper);
+ fmpz_poly_clear(a_upper);
+ fmpz_poly_clear(b_upper);
+
+ arf_t lower_arf, upper_arf;
+ arf_init(lower_arf);
+ arf_init(upper_arf);
+ slong len1 = fmpz_poly_length(c_lower);
+ FLINT_ASSERT(len1 <= fmpz_poly_length(c_upper));
+ for (slong i = 0; i < len1; ++i) {
+ arf_set_fmpz(lower_arf, &c_lower->coeffs[i]);
+ arf_set_fmpz(upper_arf, &c_upper->coeffs[i]);
+ arb_set_interval_arf(&c->coeffs[i], lower_arf, upper_arf, prec);
+ }
+ arf_zero(lower_arf);
+ for (slong i = len1; i < fmpz_poly_length(c_upper); ++i) {
+ arf_set_fmpz(upper_arf, &c_upper->coeffs[i]);
+ arb_set_interval_arf(&c->coeffs[i], lower_arf, upper_arf, prec);
+ }
+ fmpz_poly_clear(c_lower);
+ fmpz_poly_clear(c_upper);
+ arf_clear(lower_arf);
+ arf_clear(upper_arf);
+ } else {
+ // use 1 high-precision multiplication and 2 low-precision multiplications
+ /* (xm + xr)*(ym + yr) = (xm*ym) + (xr*ym + xm*yr + xr*yr)
+ = (xm*ym) + (xm*yr + xr*(ym + yr)) */
+ fmpz_poly_t a_mid, a_rad, b_mid, b_rad, c_mid, c_rad;
+
+ fmpz_poly_init(a_mid);
+ fmpz_poly_init(a_rad);
+ fmpz_poly_init(b_mid);
+ fmpz_poly_init(b_rad);
+ fmpz_poly_init(c_mid);
+ fmpz_poly_init(c_rad);
+
+ arb_poly_get_mid_rad_fmpz_poly(a_mid, a_rad, a);
+ arb_poly_get_mid_rad_fmpz_poly(b_mid, b_rad, b);
+
+ fmpz_poly_mul(c_mid, a_mid, b_mid);
+
+ for (slong i = 0; i < fmpz_poly_length(a_mid); ++i)
+ fmpz_abs(&a_mid->coeffs[i], &a_mid->coeffs[i]);
+ for (slong i = 0; i < fmpz_poly_length(b_mid); ++i)
+ fmpz_abs(&b_mid->coeffs[i], &b_mid->coeffs[i]);
+
+ fmpz_poly_mul(c_rad, a_mid, b_rad);
+ fmpz_poly_add(a_mid, b_mid, b_rad);
+ fmpz_poly_clear(b_rad);
+ fmpz_poly_mul(b_mid, a_mid, a_rad);
+ fmpz_poly_clear(a_mid);
+ fmpz_poly_clear(a_rad);
+ fmpz_poly_add(c_rad, c_rad, b_mid);
+ fmpz_poly_clear(b_mid);
+
+ slong len1 = fmpz_poly_length(c_mid);
+ FLINT_ASSERT(len1 <= arb_poly_length(c));
+ for (slong i = 0; i < len1; ++i)
+ arf_set_fmpz(arb_midref(&c->coeffs[i]), &c_mid->coeffs[i]);
+
+ len1 = fmpz_poly_length(c_rad);
+ FLINT_ASSERT(len1 <= arb_poly_length(c));
+ for (slong i = 0; i < len1; ++i)
+ mag_set_fmpz(arb_radref(&c->coeffs[i]), &c_rad->coeffs[i]);
+
+ fmpz_poly_clear(c_mid);
+ fmpz_poly_clear(c_rad);
+ }
+
+ _arb_poly_set_length(c, len);
+ _arb_poly_normalise(c);
+}
+
+// given as input double a, find a real number numer/denom such that
+// 2^(numer/denom*k) can be computed quickly.
+// Internal implementation detail: Since multiplication by a power of 2 is fast,
+// we want denom to be small, precompute 2^(numer/denom*k) for 0 <= k < denom
+typedef struct nice_powers_struct {
+ slong numer, denom /* always positive */;
+ arb_struct* powers; // size = denom
+} nice_powers_struct;
+
+static double f_prime(size_t i, size_t ja, size_t jb,
+ const slong *ceil_hat_f, const slong *ceil_hat_g, const slong *floor_h, slong prec)
+{
+ slong mind = LONG_MAX;
+ for (size_t j = ja; j <= jb; ++j) {
+ slong d = lower_d(floor_h, ceil_hat_f, ceil_hat_g, i, j);
+ if (d < mind) mind = d;
+ }
+ return (double)(ceil_hat_f[i] - prec + mind - 2);
+}
+
+static double g_prime(size_t j, size_t ia, size_t ib,
+ const slong *ceil_hat_f, const slong *ceil_hat_g, const slong *floor_h, slong prec)
+{
+ slong mind = LONG_MAX;
+ for (size_t i = ia; i <= ib; ++i) {
+ slong d = lower_d(floor_h, ceil_hat_f, ceil_hat_g, i, j);
+ if (d < mind) mind = d;
+ }
+ return (double)(ceil_hat_g[j] - prec + mind - 2);
+}
+
+// max_error may not be exactly satisfied, unlike ball arithmetic
+static void nice_powers_init(nice_powers_struct* data, double a, double max_error, slong prec) {
+ FLINT_ASSERT(max_error > 0);
+ FLINT_ASSERT(ceil(0.5 / max_error) < ((ulong) -1) / 2);
+ data->denom = ceil(0.5 / max_error);
+ data->numer = round(a * data->denom);
+ // TODO use best_rational_fast should be better
+ data->powers = _arb_vec_init(data->denom);
+
+ arb_t two_pow_numer_over_denom, two;
+ arb_init(two_pow_numer_over_denom);
+ arb_set_si(two_pow_numer_over_denom, data->numer);
+ arb_div_si(two_pow_numer_over_denom, two_pow_numer_over_denom, data->denom, prec);
+ arb_init(two);
+ arb_set_ui(two, 2);
+ arb_pow(two_pow_numer_over_denom, two, two_pow_numer_over_denom, prec);
+ arb_clear(two);
+
+ _arb_vec_set_powers(data->powers, two_pow_numer_over_denom, data->denom, prec);
+ arb_clear(two_pow_numer_over_denom);
+}
+
+static void nice_powers_clear(nice_powers_struct* data) {
+ _arb_vec_clear(data->powers, data->denom);
+}
+
+// y = x * 2^(numer/denom*k + extra). Allow aliasing between x and y
+static void nice_powers_mul(arb_t y, nice_powers_struct *data, arb_srcptr x, slong k, slong prec, slong extra)
+{
+ slong q = k / data->denom;
+ slong r = k % data->denom;
+ if (r < 0) { q -= 1; r += data->denom; }
+ arb_mul_2exp_si(y, x, q * data->numer + extra); // because of aliasing, x may be invalid now
+ if (r) {
+ arb_mul(y, y, data->powers + r, prec);
+ }
+}
+
+static void _arb_poly_mullow_add_product_fft(
+ arb_poly_t c,
+ size_t ia, size_t ib, size_t ja, size_t jb,
+ const arb_poly_t a, const arb_poly_t b,
+ const slong* ceil_hat_f, const slong* ceil_hat_g,
+ const slong* floor_h, slong prec) {
+ double fpia /* f'(i_A) */ = f_prime(ia, ja, jb, ceil_hat_f, ceil_hat_g, floor_h, prec);
+ double fpib = f_prime(ib, ja, jb, ceil_hat_f, ceil_hat_g, floor_h, prec);
+ double gpja = g_prime(ja, ia, ib, ceil_hat_f, ceil_hat_g, floor_h, prec);
+ double gpjb = g_prime(jb, ia, ib, ceil_hat_f, ceil_hat_g, floor_h, prec);
+ double lc_a /* lowercase a */ = (fpib - fpia + gpjb - gpja) / (double)(ib - ia + jb - ja);
+ nice_powers_struct data;
+ nice_powers_init(&data, lc_a, 1. / (double)(ib - ia + jb - ja), prec);
+ lc_a = (double)data.numer / (double)data.denom * (data.numer > 0 ? 1 + 1e-15 : 1 - 1e-15);
+ slong lc_b = floor(FLINT_MIN(fpia - lc_a * ia, fpib - lc_a * ib) * (1 - 1e-15)),
+ lc_c = floor(FLINT_MIN(gpja - lc_a * ja, gpjb - lc_a * jb) * (1 - 1e-15));
+ // might be an underestimation, but must not be an overestimation
+
+ arb_poly_t a_prime, b_prime;
+ arb_poly_init2(a_prime, ib - ia + 1);
+ arb_poly_init2(b_prime, jb - ja + 1);
+
+ for (size_t i = ia; i <= ib; ++i)
+ // a_prime[i-ia] = a[i] / 2^(lc_a*i+lc_b)
+ nice_powers_mul(&a_prime->coeffs[i - ia], &data, &a->coeffs[i], -(slong)i, prec, -lc_b);
+ for (size_t j = ja; j <= jb; ++j)
+ // b_prime[j-ja] = b[j] / 2^(lc_a*j+lc_c)
+ nice_powers_mul(&b_prime->coeffs[j - ja], &data, &b->coeffs[j], -(slong)j, prec, -lc_c);
+
+ _arb_poly_set_length(a_prime, ib - ia + 1);
+ _arb_poly_set_length(b_prime, jb - ja + 1);
+ _arb_poly_normalise(a_prime);
+ _arb_poly_normalise(b_prime);
+
+ arb_poly_t c_prime;
+ arb_poly_init2(c_prime, ib - ia + jb - ja + 1);
+ _arb_poly_convolve_using_fmpz_multiply(c_prime, a_prime, b_prime, prec);
+ arb_poly_clear(a_prime);
+ arb_poly_clear(b_prime);
+
+ arb_t tmp;
+ arb_init(tmp);
+ for (size_t k = ia + ja; k <= ib + jb; ++k)
+ {
+ // c[k] += c_prime[k - ia - ja] * 2^(a*k + lc_b + lc_c)
+ nice_powers_mul(tmp, &data, &c_prime->coeffs[k - ia - ja], (slong)k, prec, lc_b + lc_c);
+ arb_add(&c->coeffs[k], &c->coeffs[k], tmp, prec);
+ }
+ arb_clear(tmp);
+ nice_powers_clear(&data);
+ arb_poly_clear(c_prime);
+}
+
+static void _arb_poly_mullow_add_product(
+ arb_poly_t c,
+ const arb_poly_t a, const arb_poly_t b,
+ size_t ia, size_t ib, size_t ja, size_t jb,
+ const slong* j_down, const slong* floor_h, const slong* ceil_hat_f, const slong* ceil_hat_g, slong prec) {
+ FLINT_ASSERT(ia <= ib);
+ FLINT_ASSERT(ja <= jb);
+ if (_arb_poly_should_add_product_early_return(ia, ib, ja, jb, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec)) return;
+ if (is_below_witness_path(j_down, ia, ja) && lower_d(floor_h, ceil_hat_f, ceil_hat_g, ia, ja) >= prec) {
+ size_t jm;
+ // find max jm >= ja such that is_below_witness_path(ia, jm) && lower_d(ia, jm) >= prec
+ jm = jb;
+ while (!(is_below_witness_path(j_down, ia, jm) && lower_d(floor_h, ceil_hat_f, ceil_hat_g, ia, jm) >= prec)) {
+ jm--;
+ }
+ FLINT_ASSERT(_arb_poly_should_add_product_early_return(ia, ib, ja, jm, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec));
+ _arb_poly_mullow_add_product(c, a, b, ia, ib, jm + 1, jb, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+ return;
+ }
+ if (is_above_witness_path(j_down, ib, jb) && lower_d(floor_h, ceil_hat_f, ceil_hat_g, ib, jb) >= prec) {
+ size_t jm;
+ // find min jm <= jb such that is_above_witness_path(ib, jm) && lower_d(ib, jb) >= prec
+ jm = ja;
+ while (!(is_above_witness_path(j_down, ib, jm) && lower_d(floor_h, ceil_hat_f, ceil_hat_g, ib, jm) >= prec)) {
+ jm++;
+ }
+ _arb_poly_mullow_add_product(c, a, b, ia, ib, ja, jm - 1, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+ FLINT_ASSERT(_arb_poly_should_add_product_early_return(ia, ib, jm, jb, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec));
+ return;
+ }
+ const size_t NAIVE_THRESHOLD = 3;
+ if (ib - ia <= NAIVE_THRESHOLD || jb - ja <= NAIVE_THRESHOLD) {
+ arb_t tmp;
+ arb_init(tmp);
+
+ for (size_t i = ia; i <= ib; ++i)
+ for (size_t j = ja; j <= jb; ++j) {
+ // c[i+j] = c[i+j] + a[i] * b[j]
+ arb_mul(tmp, &a->coeffs[i], &b->coeffs[j], prec);
+ arb_add(&c->coeffs[i + j], &c->coeffs[i + j], tmp, prec);
+ }
+
+ arb_clear(tmp);
+ return;
+ }
+ if (FLINT_MAX(lower_d(floor_h, ceil_hat_f, ceil_hat_g, ia, jb), lower_d(floor_h, ceil_hat_f, ceil_hat_g, ib, ja)) <= 3*prec) {
+ _arb_poly_mullow_add_product_fft(c, ia, ib, ja, jb, a, b, ceil_hat_f, ceil_hat_g, floor_h, prec);
+ return;
+ }
+ size_t im = (ia+ib)/2, jm = (ja+jb)/2;
+ _arb_poly_mullow_add_product(c, a, b, ia, im, ja, jm, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+ _arb_poly_mullow_add_product(c, a, b, im+1, ib, ja, jm, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+ _arb_poly_mullow_add_product(c, a, b, ia, im, jm+1, jb, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+ _arb_poly_mullow_add_product(c, a, b, im+1, ib, jm+1, jb, j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+}
+
+// main function, compute c = a * b. Despite the name this doesn't yet support specifying output length.
+void arb_poly_mullow_convex(arb_poly_t c, const arb_poly_t a, const arb_poly_t b, slong prec) {
+ if (arb_poly_length(a) == 0 || arb_poly_length(b) == 0) {
+ arb_poly_zero(c);
+ return;
+ }
+
+ slong *f = FLINT_ARRAY_ALLOC(arb_poly_length(a), slong);
+ _arb_poly_exponent_sequence(f, a);
+
+ slong *hat_f_indices = FLINT_ARRAY_ALLOC(arb_poly_length(a), slong);
+ size_t hat_f_indices_size = _slong_vec_convex_hull(hat_f_indices, f, arb_poly_degree(a));
+ slong *ceil_hat_f = FLINT_ARRAY_ALLOC(arb_poly_length(a), slong);
+ build_ceil_hat(ceil_hat_f, f, hat_f_indices, hat_f_indices_size);
+
+ slong *g = FLINT_ARRAY_ALLOC(arb_poly_length(b), slong);
+ _arb_poly_exponent_sequence(g, b);
+
+ slong *hat_g_indices = FLINT_ARRAY_ALLOC(arb_poly_length(b), slong);
+ size_t hat_g_indices_size = _slong_vec_convex_hull(hat_g_indices, g, arb_poly_degree(b));
+ slong *ceil_hat_g = FLINT_ARRAY_ALLOC(arb_poly_length(b), slong);
+ build_ceil_hat(ceil_hat_g, g, hat_g_indices, hat_g_indices_size);
+
+ slong *j_down = FLINT_ARRAY_ALLOC(arb_poly_length(a), slong);
+ slong *floor_h = FLINT_ARRAY_ALLOC(arb_poly_degree(a) + arb_poly_degree(b) + 1, slong);
+
+ _slong_vec_convex_maxplus_convolve(j_down, floor_h,
+ f, arb_poly_degree(a), hat_f_indices, hat_f_indices_size,
+ g, arb_poly_degree(b), hat_g_indices, hat_g_indices_size);
+
+ slong len = arb_poly_degree(a) + arb_poly_degree(b) + 1;
+ arb_poly_fit_length(c, len);
+ {
+ mag_t t, tmp;
+ mag_init(t);
+ mag_init(tmp);
+ mag_set_ui(t, (ulong) FLINT_MIN(arb_poly_length(a), arb_poly_length(b)));
+ for (size_t k = 0; k < (size_t)len; ++k) {
+ arb_zero(&c->coeffs[k]);
+ mag_set(tmp, t);
+ mag_mul_2exp_si(tmp, tmp, floor_h[k] + 1 - prec);
+ arb_add_error_mag(&c->coeffs[k], tmp);
+ }
+ mag_clear(t);
+ mag_clear(tmp);
+ }
+
+ _arb_poly_mullow_add_product(c, a, b, 0, arb_poly_degree(a), 0, arb_poly_degree(b), j_down, floor_h, ceil_hat_f, ceil_hat_g, prec);
+
+ _arb_poly_set_length(c, len);
+ _arb_poly_normalise(c);
+
+ flint_free(f);
+ flint_free(hat_f_indices);
+ flint_free(g);
+ flint_free(hat_g_indices);
+ flint_free(j_down);
+ flint_free(floor_h);
+ flint_free(ceil_hat_f);
+ flint_free(ceil_hat_g);
+}
diff --git a/src/qqbar/atan_pi.c b/src/qqbar/atan_pi.c
index 26e5a0b71c..eaf3d563e2 100644
--- a/src/qqbar/atan_pi.c
+++ b/src/qqbar/atan_pi.c
@@ -14,6 +14,7 @@
#include "qqbar.h"
#include "qqbar/impl.h"
+/* Find a rational p/q close to x with |p/q-x| < 1/N and q <= N. */
void
best_rational_fast(slong * p, ulong * q, double x, slong N)
{