From a72fd0b4e4898690c6143115817d864efa3909da Mon Sep 17 00:00:00 2001
From: "Marc A. Marti-Renom" <martirenom@gmail.com>
Date: Wed, 31 Jul 2024 16:49:11 +0200
Subject: [PATCH 1/4] Added permutations_array to Moran_Local class This allows
 users to define a custom permutations array for the Moran_Local class to be
 used in crand. This is useful for custom applications.

---
 esda/crand.py | 12 ++++++++++--
 esda/moran.py |  4 ++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/esda/crand.py b/esda/crand.py
index 523cfbd4..18e389d3 100644
--- a/esda/crand.py
+++ b/esda/crand.py
@@ -69,6 +69,7 @@ def crand(
     w,
     observed,
     permutations,
+    permutations_array,
     keep,
     n_jobs,
     stat_func,
@@ -91,6 +92,8 @@ def crand(
         (N,) array with observed values
     permutations : int
         Number of permutations for conditional randomisation
+    permutations_array : ndarray
+        (permutations, ) array with indices of permuted
     keep : Boolean
         If True, store simulation; else do not return randomised statistics
     n_jobs : int
@@ -170,8 +173,13 @@ def crand(
     # self neighbor, since conditional randomization conditions on site i.
     cardinalities = np.array((adj_matrix != 0).sum(1)).flatten()
     max_card = cardinalities.max()
-    permuted_ids = vec_permutations(max_card, n, permutations, seed)
-
+    if permutations_array is None:
+        # Random permutation array
+        permuted_ids = vec_permutations(max_card, n, permutations, seed)
+    else:
+        # User defined permutation array
+        permuted_ids = permutations_array
+    
     if n_jobs != 1:
         try:
             import joblib  # noqa: F401
diff --git a/esda/moran.py b/esda/moran.py
index 79a32c6b..3055d437 100644
--- a/esda/moran.py
+++ b/esda/moran.py
@@ -894,6 +894,8 @@ class Moran_Local:  # noqa: N801
     permutations : int
         number of random permutations for calculation of pseudo
         p_values
+    permutations_array: array
+        user specified permutations
     geoda_quads : boolean
         (default=False)
         If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
@@ -1027,6 +1029,7 @@ def __init__(
         w,
         transformation="r",
         permutations=PERMUTATIONS,
+        permutations_array=None,
         geoda_quads=False,
         n_jobs=1,
         keep_simulations=True,
@@ -1064,6 +1067,7 @@ def __init__(
                 w,
                 self.Is,
                 permutations,
+                permutations_array,
                 keep_simulations,
                 n_jobs=n_jobs,
                 stat_func=_moran_local_crand,

From 30d2d4822237bffc190b588298c36f7ee625281d Mon Sep 17 00:00:00 2001
From: "Marc A. Marti-Renom" <martirenom@gmail.com>
Date: Fri, 2 Aug 2024 16:22:37 +0200
Subject: [PATCH 2/4] =?UTF-8?q?test=20for=20permutations=5Farray=20branch.?=
 =?UTF-8?q?=C3=A7=20Added=20new=20tests=20for=20the=20permutations=5Farray?=
 =?UTF-8?q?=20argument=20in=20Moran=5FLocal=20and=20crand.=20Fixed=20some?=
 =?UTF-8?q?=20test=20that=20were=20broken=20due=20to=20the=20new=20argumen?=
 =?UTF-8?q?t.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 esda/crand.py             |  8 ++++++++
 esda/join_counts_local.py |  1 +
 esda/moran.py             |  2 ++
 esda/tests/test_moran.py  | 25 +++++++++++++++++++++++++
 4 files changed, 36 insertions(+)

diff --git a/esda/crand.py b/esda/crand.py
index 18e389d3..aa67c480 100644
--- a/esda/crand.py
+++ b/esda/crand.py
@@ -173,12 +173,20 @@ def crand(
     # self neighbor, since conditional randomization conditions on site i.
     cardinalities = np.array((adj_matrix != 0).sum(1)).flatten()
     max_card = cardinalities.max()
+    
     if permutations_array is None:
         # Random permutation array
         permuted_ids = vec_permutations(max_card, n, permutations, seed)
     else:
         # User defined permutation array
         permuted_ids = permutations_array
+        if permuted_ids.shape[0] != permutations:
+            permutations = permuted_ids.shape[0]
+            warnings.warn(
+                f"Number of permutations has been adjusted to match the length of the "
+                f"permutations array. New value of 'permutations' is {permutations}.",
+                stacklevel=2,
+            )
     
     if n_jobs != 1:
         try:
diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py
index 86277958..0ede9176 100644
--- a/esda/join_counts_local.py
+++ b/esda/join_counts_local.py
@@ -138,6 +138,7 @@ def fit(self, y, n_jobs=1, permutations=999):
                 w=self.w,
                 observed=self.LJC,
                 permutations=permutations,
+                permutations_array=None,
                 keep=keep_simulations,
                 n_jobs=n_jobs,
                 stat_func=_ljc_uni,
diff --git a/esda/moran.py b/esda/moran.py
index 3055d437..1ae7608c 100644
--- a/esda/moran.py
+++ b/esda/moran.py
@@ -1410,6 +1410,7 @@ def __init__(
                 w,
                 self.Is,
                 permutations,
+                None,
                 keep_simulations,
                 n_jobs=n_jobs,
                 stat_func=_moran_local_bv_crand,
@@ -1654,6 +1655,7 @@ def __init__(
             w,
             transformation=transformation,
             permutations=permutations,
+            permutations_array=None,
             geoda_quads=geoda_quads,
             n_jobs=n_jobs,
             keep_simulations=keep_simulations,
diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py
index c35d4009..42a09967 100644
--- a/esda/tests/test_moran.py
+++ b/esda/tests/test_moran.py
@@ -162,6 +162,7 @@ def setup_method(self):
         f = libpysal.io.open(libpysal.examples.get_path("desmith.txt"))
         self.y = np.array(f.by_col["z"])
 
+
     @parametrize_desmith
     def test_Moran_Local(self, w):
         lm = moran.Moran_Local(
@@ -174,6 +175,30 @@ def test_Moran_Local(self, w):
         )
         np.testing.assert_allclose(lm.z_sim[0], -0.6990291160835514)
         np.testing.assert_allclose(lm.p_z_sim[0], 0.24226691753791396)
+        
+    @parametrize_desmith
+    def test_Moran_Local_custom_perms(self, w):
+        np.random.seed(SEED)
+        cardinalities = np.array((w.sparse != 0).sum(1)).flatten()
+        max_card = cardinalities.max()
+        original_array = np.arange(len(self.y))
+        permutations_array = np.zeros((99, max_card), dtype=np.int32)
+
+        for i in range(99):
+            random_number = np.random.randint(0, len(self.y) - max_card)
+            permutations_array[i] = original_array[random_number:random_number + max_card]
+
+        lm = moran.Moran_Local(
+            self.y,
+            w,
+            transformation="r",
+            permutations=99,
+            permutations_array=permutations_array,
+            keep_simulations=True,
+            seed=SEED,
+        )
+        np.testing.assert_allclose(lm.z_sim[0], -0.49229215590070813)
+        np.testing.assert_allclose(lm.p_z_sim[0], 0.311256412279757)
 
     @parametrize_sac
     def test_Moran_Local_labels(self, w):

From 6e712bb6b012eee25e6ba9ec12cc309b4d825787 Mon Sep 17 00:00:00 2001
From: Levi John Wolf <levi.john.wolf@gmail.com>
Date: Thu, 15 Aug 2024 10:41:39 +0100
Subject: [PATCH 3/4] move to overloading permutations argument

---
 esda/crand.py             | 32 ++++++++++++++++++++++++--------
 esda/join_counts_local.py |  1 -
 esda/moran.py             |  5 -----
 esda/tests/test_moran.py  |  3 +--
 4 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/esda/crand.py b/esda/crand.py
index aa67c480..cba54ce7 100644
--- a/esda/crand.py
+++ b/esda/crand.py
@@ -69,7 +69,6 @@ def crand(
     w,
     observed,
     permutations,
-    permutations_array,
     keep,
     n_jobs,
     stat_func,
@@ -92,8 +91,6 @@ def crand(
         (N,) array with observed values
     permutations : int
         Number of permutations for conditional randomisation
-    permutations_array : ndarray
-        (permutations, ) array with indices of permuted
     keep : Boolean
         If True, store simulation; else do not return randomised statistics
     n_jobs : int
@@ -173,13 +170,17 @@ def crand(
     # self neighbor, since conditional randomization conditions on site i.
     cardinalities = np.array((adj_matrix != 0).sum(1)).flatten()
     max_card = cardinalities.max()
-    
-    if permutations_array is None:
+
+    if np.ndim(permutations) == 0:
         # Random permutation array
+        if int(permutations) != permutations:
+            raise ValueError(
+            f"An integer number of permutations is required, but {permutations} was provided."
+            )
         permuted_ids = vec_permutations(max_card, n, permutations, seed)
-    else:
+    elif np.ndim(permutations) == 2:
         # User defined permutation array
-        permuted_ids = permutations_array
+        permuted_ids = permutations
         if permuted_ids.shape[0] != permutations:
             permutations = permuted_ids.shape[0]
             warnings.warn(
@@ -187,7 +188,22 @@ def crand(
                 f"permutations array. New value of 'permutations' is {permutations}.",
                 stacklevel=2,
             )
-    
+        if permuted_ids.shape[1] < max_card:
+            raise ValueError(
+            f"The `permutations` provided were shape {permuted_ids.shape}"
+            f", but must be ({permutations, max_card}) in order to supply"
+            f" enough possible neighbors to shuffle every observation."
+            f" Consider supplying a wider permutation matrix, with"
+            f" {max_card-permuted_ids.shape[1]} more columns."
+            )
+    else:
+        raise ValueError(
+            f"The `permutations` argument must be either an integer, or a"
+            f" two-dimensional numpy array of shape (p,k), where p is the"
+            f" number of permutations to run and k is the largest amount of"
+            f" shuffled pairs that must be considered at a given site, but"
+            f" {permutations} was provided."
+            )    
     if n_jobs != 1:
         try:
             import joblib  # noqa: F401
diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py
index 0ede9176..86277958 100644
--- a/esda/join_counts_local.py
+++ b/esda/join_counts_local.py
@@ -138,7 +138,6 @@ def fit(self, y, n_jobs=1, permutations=999):
                 w=self.w,
                 observed=self.LJC,
                 permutations=permutations,
-                permutations_array=None,
                 keep=keep_simulations,
                 n_jobs=n_jobs,
                 stat_func=_ljc_uni,
diff --git a/esda/moran.py b/esda/moran.py
index 1ae7608c..62a7e724 100644
--- a/esda/moran.py
+++ b/esda/moran.py
@@ -894,8 +894,6 @@ class Moran_Local:  # noqa: N801
     permutations : int
         number of random permutations for calculation of pseudo
         p_values
-    permutations_array: array
-        user specified permutations
     geoda_quads : boolean
         (default=False)
         If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
@@ -1029,7 +1027,6 @@ def __init__(
         w,
         transformation="r",
         permutations=PERMUTATIONS,
-        permutations_array=None,
         geoda_quads=False,
         n_jobs=1,
         keep_simulations=True,
@@ -1067,7 +1064,6 @@ def __init__(
                 w,
                 self.Is,
                 permutations,
-                permutations_array,
                 keep_simulations,
                 n_jobs=n_jobs,
                 stat_func=_moran_local_crand,
@@ -1655,7 +1651,6 @@ def __init__(
             w,
             transformation=transformation,
             permutations=permutations,
-            permutations_array=None,
             geoda_quads=geoda_quads,
             n_jobs=n_jobs,
             keep_simulations=keep_simulations,
diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py
index 42a09967..f7d65ba7 100644
--- a/esda/tests/test_moran.py
+++ b/esda/tests/test_moran.py
@@ -192,8 +192,7 @@ def test_Moran_Local_custom_perms(self, w):
             self.y,
             w,
             transformation="r",
-            permutations=99,
-            permutations_array=permutations_array,
+            permutations=permutations_array,
             keep_simulations=True,
             seed=SEED,
         )

From 08a55f53b59219b1eaffb9759efd0b62febdccf2 Mon Sep 17 00:00:00 2001
From: Levi John Wolf <levi.john.wolf@gmail.com>
Date: Thu, 15 Aug 2024 10:49:19 +0100
Subject: [PATCH 4/4] add checks, add docs to crand-using classes

---
 esda/crand.py                | 11 +++++++++--
 esda/geary_local.py          | 13 +++++++++----
 esda/getisord.py             | 12 +++++++++---
 esda/join_counts_local.py    | 12 +++++++++---
 esda/join_counts_local_bv.py | 11 +++++++++--
 esda/join_counts_local_mv.py | 11 +++++++++--
 esda/moran.py                | 36 +++++++++++++++++++++++++++---------
 7 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/esda/crand.py b/esda/crand.py
index cba54ce7..235dbc3b 100644
--- a/esda/crand.py
+++ b/esda/crand.py
@@ -89,8 +89,15 @@ def crand(
         Spatial weights object
     observed : ndarray
         (N,) array with observed values
-    permutations : int
-        Number of permutations for conditional randomisation
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     keep : Boolean
         If True, store simulation; else do not return randomised statistics
     n_jobs : int
diff --git a/esda/geary_local.py b/esda/geary_local.py
index 584bd55e..7543307a 100644
--- a/esda/geary_local.py
+++ b/esda/geary_local.py
@@ -42,10 +42,15 @@ def __init__(
                            (default=0.05)
                            Default significance threshold used for
                            creation of labels groups.
-        permutations     : int
-                           (default=999)
-                           number of random permutations for calculation
-                           of pseudo p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs           : int
                            (default=1)
                            Number of cores to be used in the conditional
diff --git a/esda/getisord.py b/esda/getisord.py
index 3862468e..a89ed102 100644
--- a/esda/getisord.py
+++ b/esda/getisord.py
@@ -257,9 +257,15 @@ class G_Local:  # noqa: N801
         spatial weights instance as W or Graph aligned with y
     transform : {'R', 'B'}
         the type of w, either 'B' (binary) or 'R' (row-standardized)
-    permutations : int
-        the number of random permutations for calculating
-        pseudo p values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     star : boolean or float
         whether or not to include focal observation in sums (default: False)
         if the row-transformed weight is provided, then this is the default
diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py
index 86277958..da0d2fab 100644
--- a/esda/join_counts_local.py
+++ b/esda/join_counts_local.py
@@ -30,9 +30,15 @@ def __init__(
         ----------
         connectivity : W | Graph
             spatial weights instance as W or Graph aligned with y
-        permutations : int
-            number of random permutations for calculation of pseudo
-            p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs : int
             Number of cores to be used in the conditional randomisation. If -1,
             all available cores are used.
diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py
index 69979b8c..88a21996 100644
--- a/esda/join_counts_local_bv.py
+++ b/esda/join_counts_local_bv.py
@@ -30,8 +30,15 @@ def __init__(
         ----------
         connectivity : W | Graph
             spatial weights instance as W or Graph aligned with y
-        permutations : int
-            number of random permutations for calculation of pseudo p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs : int
             Number of cores to be used in the conditional randomisation.
             If -1, all available cores are used.
diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py
index e9e94f25..fab44daf 100644
--- a/esda/join_counts_local_mv.py
+++ b/esda/join_counts_local_mv.py
@@ -30,8 +30,15 @@ def __init__(
         ----------
         connectivity : W | Graph
             spatial weights instance as W or Graph aligned with y
-        permutations : int
-            number of random permutations for calculation of pseudo p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs : int
             Number of cores to be used in the conditional randomisation. If -1,
             all available cores are used.
diff --git a/esda/moran.py b/esda/moran.py
index 62a7e724..84deaa8c 100644
--- a/esda/moran.py
+++ b/esda/moran.py
@@ -891,9 +891,15 @@ class Moran_Local:  # noqa: N801
          "D": doubly-standardized,
          "U": untransformed (general weights),
          "V": variance-stabilizing.
-    permutations : int
-        number of random permutations for calculation of pseudo
-        p_values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     geoda_quads : boolean
         (default=False)
         If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
@@ -1265,9 +1271,15 @@ class Moran_Local_BV:  # noqa: N801
         "D": doubly-standardized,
         "U": untransformed (general weights),
         "V": variance-stabilizing.
-    permutations   : int
-        number of random permutations for calculation of pseudo
-        p_values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     geoda_quads    : boolean
         (default=False)
         If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
@@ -1536,9 +1548,15 @@ class Moran_Local_Rate(Moran_Local):  # noqa: N801
         "D": doubly-standardized,
         "U": untransformed (general weights),
         "V": variance-stabilizing.
-    permutations : int
-        number of random permutations for calculation of pseudo
-        p_values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     geoda_quads : boolean
          (default=False)
          If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4