From a72fd0b4e4898690c6143115817d864efa3909da Mon Sep 17 00:00:00 2001 From: "Marc A. Marti-Renom" Date: Wed, 31 Jul 2024 16:49:11 +0200 Subject: [PATCH 1/4] Added permutations_array to Moran_Local class This allows users to define a custom permutations array for the Moran_Local class to be used in crand. This is useful for custom applications. --- esda/crand.py | 12 ++++++++++-- esda/moran.py | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/esda/crand.py b/esda/crand.py index 523cfbd4..18e389d3 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -69,6 +69,7 @@ def crand( w, observed, permutations, + permutations_array, keep, n_jobs, stat_func, @@ -91,6 +92,8 @@ def crand( (N,) array with observed values permutations : int Number of permutations for conditional randomisation + permutations_array : ndarray + (permutations, ) array with indices of permuted keep : Boolean If True, store simulation; else do not return randomised statistics n_jobs : int @@ -170,8 +173,13 @@ def crand( # self neighbor, since conditional randomization conditions on site i. cardinalities = np.array((adj_matrix != 0).sum(1)).flatten() max_card = cardinalities.max() - permuted_ids = vec_permutations(max_card, n, permutations, seed) - + if permutations_array is None: + # Random permutation array + permuted_ids = vec_permutations(max_card, n, permutations, seed) + else: + # User defined permutation array + permuted_ids = permutations_array + if n_jobs != 1: try: import joblib # noqa: F401 diff --git a/esda/moran.py b/esda/moran.py index 79a32c6b..3055d437 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -894,6 +894,8 @@ class Moran_Local: # noqa: N801 permutations : int number of random permutations for calculation of pseudo p_values + permutations_array: array + user specified permutations geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 @@ -1027,6 +1029,7 @@ def __init__( w, transformation="r", permutations=PERMUTATIONS, + permutations_array=None, geoda_quads=False, n_jobs=1, keep_simulations=True, @@ -1064,6 +1067,7 @@ def __init__( w, self.Is, permutations, + permutations_array, keep_simulations, n_jobs=n_jobs, stat_func=_moran_local_crand, From 30d2d4822237bffc190b588298c36f7ee625281d Mon Sep 17 00:00:00 2001 From: "Marc A. Marti-Renom" Date: Fri, 2 Aug 2024 16:22:37 +0200 Subject: [PATCH 2/4] =?UTF-8?q?test=20for=20permutations=5Farray=20branch.?= =?UTF-8?q?=C3=A7=20Added=20new=20tests=20for=20the=20permutations=5Farray?= =?UTF-8?q?=20argument=20in=20Moran=5FLocal=20and=20crand.=20Fixed=20some?= =?UTF-8?q?=20test=20that=20were=20broken=20due=20to=20the=20new=20argumen?= =?UTF-8?q?t.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esda/crand.py | 8 ++++++++ esda/join_counts_local.py | 1 + esda/moran.py | 2 ++ esda/tests/test_moran.py | 25 +++++++++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/esda/crand.py b/esda/crand.py index 18e389d3..aa67c480 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -173,12 +173,20 @@ def crand( # self neighbor, since conditional randomization conditions on site i. cardinalities = np.array((adj_matrix != 0).sum(1)).flatten() max_card = cardinalities.max() + if permutations_array is None: # Random permutation array permuted_ids = vec_permutations(max_card, n, permutations, seed) else: # User defined permutation array permuted_ids = permutations_array + if permuted_ids.shape[0] != permutations: + permutations = permuted_ids.shape[0] + warnings.warn( + f"Number of permutations has been adjusted to match the length of the " + f"permutations array. New value of 'permutations' is {permutations}.", + stacklevel=2, + ) if n_jobs != 1: try: diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 86277958..0ede9176 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -138,6 +138,7 @@ def fit(self, y, n_jobs=1, permutations=999): w=self.w, observed=self.LJC, permutations=permutations, + permutations_array=None, keep=keep_simulations, n_jobs=n_jobs, stat_func=_ljc_uni, diff --git a/esda/moran.py b/esda/moran.py index 3055d437..1ae7608c 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -1410,6 +1410,7 @@ def __init__( w, self.Is, permutations, + None, keep_simulations, n_jobs=n_jobs, stat_func=_moran_local_bv_crand, @@ -1654,6 +1655,7 @@ def __init__( w, transformation=transformation, permutations=permutations, + permutations_array=None, geoda_quads=geoda_quads, n_jobs=n_jobs, keep_simulations=keep_simulations, diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py index c35d4009..42a09967 100644 --- a/esda/tests/test_moran.py +++ b/esda/tests/test_moran.py @@ -162,6 +162,7 @@ def setup_method(self): f = libpysal.io.open(libpysal.examples.get_path("desmith.txt")) self.y = np.array(f.by_col["z"]) + @parametrize_desmith def test_Moran_Local(self, w): lm = moran.Moran_Local( @@ -174,6 +175,30 @@ def test_Moran_Local(self, w): ) np.testing.assert_allclose(lm.z_sim[0], -0.6990291160835514) np.testing.assert_allclose(lm.p_z_sim[0], 0.24226691753791396) + + @parametrize_desmith + def test_Moran_Local_custom_perms(self, w): + np.random.seed(SEED) + cardinalities = np.array((w.sparse != 0).sum(1)).flatten() + max_card = cardinalities.max() + original_array = np.arange(len(self.y)) + permutations_array = np.zeros((99, max_card), dtype=np.int32) + + for i in range(99): + random_number = np.random.randint(0, len(self.y) - max_card) + permutations_array[i] = original_array[random_number:random_number + max_card] + + lm = moran.Moran_Local( + self.y, + w, + transformation="r", + permutations=99, + permutations_array=permutations_array, + keep_simulations=True, + seed=SEED, + ) + np.testing.assert_allclose(lm.z_sim[0], -0.49229215590070813) + np.testing.assert_allclose(lm.p_z_sim[0], 0.311256412279757) @parametrize_sac def test_Moran_Local_labels(self, w): From 6e712bb6b012eee25e6ba9ec12cc309b4d825787 Mon Sep 17 00:00:00 2001 From: Levi John Wolf Date: Thu, 15 Aug 2024 10:41:39 +0100 Subject: [PATCH 3/4] move to overloading permutations argument --- esda/crand.py | 32 ++++++++++++++++++++++++-------- esda/join_counts_local.py | 1 - esda/moran.py | 5 ----- esda/tests/test_moran.py | 3 +-- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/esda/crand.py b/esda/crand.py index aa67c480..cba54ce7 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -69,7 +69,6 @@ def crand( w, observed, permutations, - permutations_array, keep, n_jobs, stat_func, @@ -92,8 +91,6 @@ def crand( (N,) array with observed values permutations : int Number of permutations for conditional randomisation - permutations_array : ndarray - (permutations, ) array with indices of permuted keep : Boolean If True, store simulation; else do not return randomised statistics n_jobs : int @@ -173,13 +170,17 @@ def crand( # self neighbor, since conditional randomization conditions on site i. cardinalities = np.array((adj_matrix != 0).sum(1)).flatten() max_card = cardinalities.max() - - if permutations_array is None: + + if np.ndim(permutations) == 0: # Random permutation array + if int(permutations) != permutations: + raise ValueError( + f"An integer number of permutations is required, but {permutations} was provided." + ) permuted_ids = vec_permutations(max_card, n, permutations, seed) - else: + elif np.ndim(permutations) == 2: # User defined permutation array - permuted_ids = permutations_array + permuted_ids = permutations if permuted_ids.shape[0] != permutations: permutations = permuted_ids.shape[0] warnings.warn( @@ -187,7 +188,22 @@ def crand( f"permutations array. New value of 'permutations' is {permutations}.", stacklevel=2, ) - + if permuted_ids.shape[1] < max_card: + raise ValueError( + f"The `permutations` provided were shape {permuted_ids.shape}" + f", but must be ({permutations, max_card}) in order to supply" + f" enough possible neighbors to shuffle every observation." + f" Consider supplying a wider permutation matrix, with" + f" {max_card-permuted_ids.shape[1]} more columns." + ) + else: + raise ValueError( + f"The `permutations` argument must be either an integer, or a" + f" two-dimensional numpy array of shape (p,k), where p is the" + f" number of permutations to run and k is the largest amount of" + f" shuffled pairs that must be considered at a given site, but" + f" {permutations} was provided." + ) if n_jobs != 1: try: import joblib # noqa: F401 diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 0ede9176..86277958 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -138,7 +138,6 @@ def fit(self, y, n_jobs=1, permutations=999): w=self.w, observed=self.LJC, permutations=permutations, - permutations_array=None, keep=keep_simulations, n_jobs=n_jobs, stat_func=_ljc_uni, diff --git a/esda/moran.py b/esda/moran.py index 1ae7608c..62a7e724 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -894,8 +894,6 @@ class Moran_Local: # noqa: N801 permutations : int number of random permutations for calculation of pseudo p_values - permutations_array: array - user specified permutations geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 @@ -1029,7 +1027,6 @@ def __init__( w, transformation="r", permutations=PERMUTATIONS, - permutations_array=None, geoda_quads=False, n_jobs=1, keep_simulations=True, @@ -1067,7 +1064,6 @@ def __init__( w, self.Is, permutations, - permutations_array, keep_simulations, n_jobs=n_jobs, stat_func=_moran_local_crand, @@ -1655,7 +1651,6 @@ def __init__( w, transformation=transformation, permutations=permutations, - permutations_array=None, geoda_quads=geoda_quads, n_jobs=n_jobs, keep_simulations=keep_simulations, diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py index 42a09967..f7d65ba7 100644 --- a/esda/tests/test_moran.py +++ b/esda/tests/test_moran.py @@ -192,8 +192,7 @@ def test_Moran_Local_custom_perms(self, w): self.y, w, transformation="r", - permutations=99, - permutations_array=permutations_array, + permutations=permutations_array, keep_simulations=True, seed=SEED, ) From 08a55f53b59219b1eaffb9759efd0b62febdccf2 Mon Sep 17 00:00:00 2001 From: Levi John Wolf Date: Thu, 15 Aug 2024 10:49:19 +0100 Subject: [PATCH 4/4] add checks, add docs to crand-using classes --- esda/crand.py | 11 +++++++++-- esda/geary_local.py | 13 +++++++++---- esda/getisord.py | 12 +++++++++--- esda/join_counts_local.py | 12 +++++++++--- esda/join_counts_local_bv.py | 11 +++++++++-- esda/join_counts_local_mv.py | 11 +++++++++-- esda/moran.py | 36 +++++++++++++++++++++++++++--------- 7 files changed, 81 insertions(+), 25 deletions(-) diff --git a/esda/crand.py b/esda/crand.py index cba54ce7..235dbc3b 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -89,8 +89,15 @@ def crand( Spatial weights object observed : ndarray (N,) array with observed values - permutations : int - Number of permutations for conditional randomisation + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. keep : Boolean If True, store simulation; else do not return randomised statistics n_jobs : int diff --git a/esda/geary_local.py b/esda/geary_local.py index 584bd55e..7543307a 100644 --- a/esda/geary_local.py +++ b/esda/geary_local.py @@ -42,10 +42,15 @@ def __init__( (default=0.05) Default significance threshold used for creation of labels groups. - permutations : int - (default=999) - number of random permutations for calculation - of pseudo p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int (default=1) Number of cores to be used in the conditional diff --git a/esda/getisord.py b/esda/getisord.py index 3862468e..a89ed102 100644 --- a/esda/getisord.py +++ b/esda/getisord.py @@ -257,9 +257,15 @@ class G_Local: # noqa: N801 spatial weights instance as W or Graph aligned with y transform : {'R', 'B'} the type of w, either 'B' (binary) or 'R' (row-standardized) - permutations : int - the number of random permutations for calculating - pseudo p values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. star : boolean or float whether or not to include focal observation in sums (default: False) if the row-transformed weight is provided, then this is the default diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 86277958..da0d2fab 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -30,9 +30,15 @@ def __init__( ---------- connectivity : W | Graph spatial weights instance as W or Graph aligned with y - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py index 69979b8c..88a21996 100644 --- a/esda/join_counts_local_bv.py +++ b/esda/join_counts_local_bv.py @@ -30,8 +30,15 @@ def __init__( ---------- connectivity : W | Graph spatial weights instance as W or Graph aligned with y - permutations : int - number of random permutations for calculation of pseudo p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py index e9e94f25..fab44daf 100644 --- a/esda/join_counts_local_mv.py +++ b/esda/join_counts_local_mv.py @@ -30,8 +30,15 @@ def __init__( ---------- connectivity : W | Graph spatial weights instance as W or Graph aligned with y - permutations : int - number of random permutations for calculation of pseudo p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. diff --git a/esda/moran.py b/esda/moran.py index 62a7e724..84deaa8c 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -891,9 +891,15 @@ class Moran_Local: # noqa: N801 "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 @@ -1265,9 +1271,15 @@ class Moran_Local_BV: # noqa: N801 "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 @@ -1536,9 +1548,15 @@ class Moran_Local_Rate(Moran_Local): # noqa: N801 "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. - permutations : int - number of random permutations for calculation of pseudo - p_values + permutations : int, np.ndarray + Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the + conditional random null hypothesis for each site. Permutations + might be specified as an array of indices if the user needs to add + structure to this conditional permutation null hypothesis. Common + reasons to do this include exchangeability violations, which might + then require us to shuffle observations within (but not between) + groups, or linearity constraints, which may require certain + sequences of observation relationships to be preserved. geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4