cvxgrp · bodono · May 14, 2026 · May 15, 2026 · May 15, 2026 · May 22, 2026
diff --git a/README.md b/README.md
@@ -115,18 +115,20 @@ examples requires installing `matplotlib` (`pip install matplotlib`).
 ## Quickstart (C)
 
 ```c
+#include <math.h>      /* INFINITY */
 #include "aa.h"
 
-AaWork *a = aa_init(n,     /* dim              */
-                    10,    /* mem              */
-                    10,    /* min_len          */
-                    1,     /* type1            */
-                    1e-8,  /* regularization   */
-                    1.0,   /* relaxation       */
-                    2.0,   /* safeguard_factor */
-                    1e10,  /* max_weight_norm  */
-                    5,     /* ir_max_steps     */
-                    0);    /* verbosity        */
+AaWork *a = aa_init(n,        /* dim              */
+                    10,       /* mem              */
+                    10,       /* min_len          */
+                    1,        /* type1            */
+                    1e-8,     /* regularization   */
+                    1.0,      /* relaxation       */
+                    2.0,      /* safeguard_factor */
+                    INFINITY, /* max_weight_norm  */
+                    INFINITY, /* trust_factor     */
+                    5,        /* ir_max_steps     */
+                    0);       /* verbosity        */
 
 for (int i = 0; i < N; i++) {
     if (i > 0) aa_apply(x, x_prev, a);
@@ -149,10 +151,11 @@ See [`tests/c/gd.c`](tests/c/gd.c) for a complete runnable example
 | `mem`              | Number of past iterates to look back                                                              | 5 – 20                                  |
 | `min_len`          | Minimum buffered residual pairs before AA begins extrapolating. `min_len = mem` waits for the memory to fill (stable default); `min_len = 1` starts extrapolating immediately. Must be ≥ 1 when `mem > 0`; clamped down when it exceeds `min(mem, dim)`. | `mem`                                   |
 | `type1`            | Type-I if true, Type-II otherwise                                                                 | see notes below                         |
-| `regularization`   | Tikhonov regularization on the AA least-squares system. `> 0`: scaled by `‖A‖_F·‖Y‖_F`. `< 0`: pinned absolute `-regularization` (no scaling). `= 0`: off. | Type-I: `1e-8`, Type-II: `1e-12`        |
+| `regularization`   | Tikhonov regularization on the AA least-squares system. `> 0`: scaled by `‖S‖_F·‖Y‖_F` (same for Type-I and Type-II — historically Type-II used `‖Y‖²` which decays quadratically as `Y→0` and underflows on slow-contraction maps). `< 0`: pinned absolute `-regularization` (no scaling). `= 0`: off. | Type-I: `1e-8`, Type-II: `1e-12`        |
 | `relaxation`       | Mixing parameter in `[0, 2]`; `1.0` is vanilla AA                                                 | `1.0`                                   |
 | `safeguard_factor` | Multiplier on the residual-growth ratio beyond which the AA step is rejected. Larger = more aggressive. | `2.0`                                   |
-| `max_weight_norm`  | Upper bound on the norm of the AA combination weights; rejects numerically unstable steps         | `1e6` – `1e10`                          |
+| `max_weight_norm`  | Hard cap: reject solves with `‖γ‖₂ ≥ max_weight_norm`. Pass `INFINITY` to disable. | `1e6` – `1e10` or `INFINITY`            |
+| `trust_factor`     | Opt-in trust region + adaptive regularization (see "Trust-region mode" below). Pass `INFINITY` to disable; pass a finite positive value (typically `10`) to enable. | `INFINITY` (off), or `~10` for ADMM/DRS |
 | `ir_max_steps`     | Cap on iterative-refinement passes for the weight solve. The loop stops early when refinement stalls, so this is an upper bound; raise for ill-conditioned problems, lower for tighter cost bounds. | `5`                                     |
 | `verbosity`        | `0` silent, higher values print progress and diagnostics                                          | `0`                                     |
 
@@ -161,19 +164,56 @@ problems but can be sensitive; Type-II is more robust. If one fails, try the
 other. Both tolerate nonsmooth `F` thanks to the safeguard, though
 convergence guarantees in that regime are stronger for Type-I (see the paper).
 
+### Trust-region mode (opt-in)
+
+Setting `trust_factor` to a finite positive value (typically `10`) turns on
+two coupled mechanisms that target a failure mode seen on slow-contraction
+maps (ADMM / DRS / proximal splitting): AA's least-squares system underflows
+near convergence, the weight vector `γ` blows up, and the safeguard's
+monotone test (`‖g_new‖ ≤ ‖g_old‖`) is too weak to catch the resulting
+"creep" — each step marginally reduces the residual without approaching the
+fixed point.
+
+1. **Trust region.** Each AA solve rejects the step if `‖D γ‖₂ > trust_factor · ‖g‖₂`,
+   where `D` is the matrix of past `Δf` columns. This bounds the iterate
+   displacement relative to the current residual and catches the
+   "γ-passes-the-weight-cap-but-`Dγ`-is-huge" failure mode directly.
+
+2. **Adaptive regularization.** The Tikhonov term `r` is replaced by a
+   self-tuning value that starts large (so initial `γ ≈ 0`, i.e. AA ≈ `F`),
+   shrinks by `0.9×` on each safeguard accept (let AA do more), and grows
+   by `10×` on each rejection (back off toward `F`). The two mechanisms
+   feed each other: a trust trip bumps `r`, the next solve produces a
+   smaller `γ`, the trust check usually passes.
+
+`trust_factor = INFINITY` (the default) disables both mechanisms and the
+library uses the standard `ε · ‖S‖_F · ‖Y‖_F` regularization. The two paths
+are independent — turning trust-region mode on only matters for the kind of
+problem above.
+
+When to set `trust_factor`:
+
+| Situation                                                         | Recommendation             |
+|-------------------------------------------------------------------|----------------------------|
+| Gradient descent, prox iterations on well-scaled problems         | leave `INFINITY` (default) |
+| Operator-splitting solvers (ADMM, PDHG, Douglas-Rachford)         | try `trust_factor = 10`    |
+| Anything where AA produces `‖γ‖₂` in the hundreds but `‖g‖` doesn't keep dropping | try `trust_factor = 10`    |
+| Newton-like ill-conditioned problems where `γ` is legitimately huge | leave `INFINITY` (default) |
+
 ## Python API
 
 ```python
 aa.AndersonAccelerator(
     dim,
     mem,
     *,
-    min_len=None,          # defaults to min(mem, dim)
+    min_len=None,           # defaults to min(mem, dim)
     type1=False,
     regularization=1e-12,
     relaxation=1.0,
     safeguard_factor=1.0,
-    max_weight_norm=1e6,
+    max_weight_norm=math.inf,
+    trust_factor=math.inf,  # see "Trust-region mode" above; ~10 for ADMM/DRS
     ir_max_steps=5,
     verbosity=0,
 )
@@ -187,7 +227,7 @@ C-contiguous, writeable `float64` numpy arrays of length `dim`.
 | `apply(f, x)`           | Call once per iteration (skip the first). `f` holds the most recent map output `F(x)`. Overwrites `f` in place with the AA-extrapolated point.    |
 | `safeguard(f_new, x_new)` | Call after running your map on the AA extrapolate. If AA did not make progress, reverts both arrays to the last-known-good state. Returns `0` on accept, `-1` on reject. |
 | `reset()`               | Clears AA state (equivalent to re-initializing) without reallocating. Lifetime `stats` counters are NOT cleared.                                 |
-| `stats`                 | Read-only property returning a dict of lifetime counters: `iter`, `n_accept`, `n_reject_lapack`, `n_reject_rank0`, `n_reject_nonfinite`, `n_reject_weight_cap`, `n_safeguard_reject`, `last_rank`, `last_aa_norm` (NaN until the first solve), `last_regularization`. Useful for diagnosing when AA isn't helping — rising `n_reject_weight_cap` or `n_reject_nonfinite` points at `max_weight_norm` / `regularization` tuning; rising `n_safeguard_reject` points at `safeguard_factor` / `mem`; `n_reject_rank0` is normal near convergence (memory is numerically zero). |
+| `stats`                 | Read-only property returning a dict of lifetime counters: `iter`, `n_accept`, `n_reject_lapack`, `n_reject_rank0`, `n_reject_nonfinite`, `n_reject_weight_cap`, `n_safeguard_reject`, `last_rank`, `last_aa_norm` (NaN until the first solve), `last_regularization`. Useful for diagnosing when AA isn't helping — high `n_safeguard_reject` with low `last_regularization` and high `last_aa_norm` on a slow-contraction problem suggests trying `trust_factor = 10`; rising `n_reject_weight_cap` or `n_reject_nonfinite` points at `max_weight_norm` / `regularization` tuning; rising `n_safeguard_reject` points at `safeguard_factor` / `mem`; `n_reject_rank0` is normal near convergence (memory is numerically zero). |
 
 ## C API
 
@@ -198,7 +238,8 @@ Python API exactly:
 AaWork *aa_init(aa_int dim, aa_int mem, aa_int min_len, aa_int type1,
                 aa_float regularization, aa_float relaxation,
                 aa_float safeguard_factor, aa_float max_weight_norm,
-                aa_int ir_max_steps, aa_int verbosity);
+                aa_float trust_factor, aa_int ir_max_steps,
+                aa_int verbosity);
 
 aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a);
 aa_int   aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a);

diff --git a/include/aa.h b/include/aa.h
@@ -49,7 +49,21 @@ typedef struct ACCEL_WORK AaWork;
  * @param relaxation        finite float \in [0,2], mixing parameter (1.0 is vanilla)
  * @param safeguard_factor  finite nonnegative factor that controls safeguarding checks
  *                          larger is more aggressive but less stable
- * @param max_weight_norm   finite positive float, maximum norm of AA weights
+ * @param max_weight_norm   positive float, maximum L2 norm of AA weights γ;
+ *                          the solve is rejected when ||γ||_2 >= max_weight_norm.
+ *                          Pass INFINITY to disable this hard cap.
+ * @param trust_factor      positive float, opt-in trust region + adaptive
+ *                          regularization. When finite, the solve rejects
+ *                          steps with ||D γ||_2 > trust_factor * ||g||_2 and
+ *                          AA's regularization adapts via accept/reject
+ *                          feedback (start large so AA ≈ DRS; shrink by 0.9×
+ *                          on each safeguard accept; grow by 10× on any
+ *                          rejection). Useful for slow-contraction maps
+ *                          (ADMM/DRS) where the LS basis can produce large
+ *                          but unproductive γ that the safeguard alone
+ *                          fails to catch. Pass INFINITY to disable — the
+ *                          original ε·||S||·||Y|| regularization path is
+ *                          then used unchanged.
  * @param ir_max_steps      max iterative refinement passes on the γ solve.
  *                          0 disables IR. Each step is O(mem²) and loops
  *                          until the correction stops contracting, so on
@@ -65,7 +79,8 @@ typedef struct ACCEL_WORK AaWork;
 AaWork *aa_init(aa_int dim, aa_int mem, aa_int min_len, aa_int type1,
                 aa_float regularization, aa_float relaxation,
                 aa_float safeguard_factor, aa_float max_weight_norm,
-                aa_int ir_max_steps, aa_int verbosity);
+                aa_float trust_factor, aa_int ir_max_steps,
+                aa_int verbosity);
 
 /**
  * Apply Anderson Acceleration. The usage pattern should be as follows:

diff --git a/python/aapy.pyx b/python/aapy.pyx
@@ -19,7 +19,7 @@ cdef extern from "../include/aa.h":
         int last_rank
         double last_aa_norm
         double last_regularization
-    AaWork *aa_init(int, int, int, int, double, double, double, double, int, int)
+    AaWork *aa_init(int, int, int, int, double, double, double, double, double, int, int)
     double aa_apply(double*, const double*, AaWork*)
     int aa_safeguard(double*, double*, AaWork*)
     void aa_reset(AaWork*)
@@ -32,7 +32,8 @@ cdef class AndersonAccelerator(object):
 
     def __cinit__(self, dim, mem, *, min_len=None, type1=False,
                   regularization=1e-12, relaxation=1.0, safeguard_factor=1.0,
-                  max_weight_norm=1e6, ir_max_steps=5, verbosity=0):
+                  max_weight_norm=math.inf, trust_factor=math.inf,
+                  ir_max_steps=5, verbosity=0):
         if dim <= 0:
             raise ValueError("dim must be positive")
         if mem < 0:
@@ -47,8 +48,11 @@ cdef class AndersonAccelerator(object):
             raise ValueError("relaxation must be finite and in [0, 2]")
         if not math.isfinite(safeguard_factor) or safeguard_factor < 0:
             raise ValueError("safeguard_factor must be finite and non-negative")
-        if not math.isfinite(max_weight_norm) or max_weight_norm <= 0:
-            raise ValueError("max_weight_norm must be finite and positive")
+        # max_weight_norm and trust_factor: math.inf disables the cap.
+        if math.isnan(max_weight_norm) or max_weight_norm <= 0:
+            raise ValueError("max_weight_norm must be positive (inf for no cap)")
+        if math.isnan(trust_factor) or trust_factor <= 0:
+            raise ValueError("trust_factor must be positive (inf for off)")
         if ir_max_steps < 0:
             raise ValueError("ir_max_steps must be non-negative")
         # min_len: minimum # of residual pairs before AA starts extrapolating.
@@ -61,7 +65,7 @@ cdef class AndersonAccelerator(object):
             raise ValueError("min_len must be >= 1 when mem > 0")
         self._wrk = aa_init(dim, mem, min_len, type1, regularization,
                             relaxation, safeguard_factor, max_weight_norm,
-                            ir_max_steps, verbosity)
+                            trust_factor, ir_max_steps, verbosity)
         if self._wrk is NULL:
             raise MemoryError("aa_init failed")
         self._dim = dim