Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b7b0653
Add files via upload
Palash123-4 Jan 25, 2024
68a7cfa
Add the multivariate distance covariance
Palash123-4 Jan 25, 2024
af0568d
Update _dcor.py
Palash123-4 Jan 25, 2024
21f4b8a
Merge branch 'vnmabus:develop' into develop
Palash123-4 Jan 31, 2024
d44fb7d
update random projection-
Jan 31, 2024
946397c
update random projection based test of independence
Jan 31, 2024
62152ef
add dist_sum function
Jan 31, 2024
3207181
Merge branch 'develop' of https://github.com/Palash123-4/dcor into de…
Jan 31, 2024
3b6fde1
add citation of random projections based distance covariance
Jan 31, 2024
8890215
update with spaces
Feb 1, 2024
0b73538
update with spaces
Feb 1, 2024
19a71f6
update with spaces
Feb 1, 2024
db98194
insert .
Feb 1, 2024
d1b4050
insert .
Feb 1, 2024
a41bdb8
insert .
Feb 1, 2024
0496e5a
insert .
Feb 1, 2024
0718f7e
update module
Feb 1, 2024
b25130d
update module
Feb 1, 2024
7b55726
update dimension evaluation
Feb 1, 2024
c89131e
update dimension evaluation
Feb 1, 2024
974c84e
Add files via upload
Palash123-4 May 6, 2024
3d25d06
Add files via upload
Palash123-4 Aug 2, 2024
bb32079
Merge branch 'vnmabus:develop' into develop
Palash123-4 Sep 3, 2024
3f3824c
Fix circular import in _rowwise module initialization
Palash123-4 Apr 20, 2026
1a55352
Merge branch 'develop' into develop
Palash123-4 Apr 20, 2026
eaced17
Fix ReadTheDocs build by using importlib.metadata instead of pkg_reso…
Palash123-4 Apr 20, 2026
6b1ae7e
Merge branch 'develop' of https://github.com/Palash123-4/dcor into de…
Palash123-4 Apr 20, 2026
d90b4a3
Fix BibTeX syntax error in refs.bib
Palash123-4 Apr 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions dcor/_dcor.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
array_namespace,
numpy_namespace,
)
##Additional module for Multivariate dcov test--------------------------------------------------------------
from scipy.special import gammaln
import math

from dcor._rowwise import rowwise
##-------------------------------------------------------------------------------------

Array = TypeVar("Array", bound=ArrayType)

Expand Down Expand Up @@ -1169,3 +1175,131 @@ def distance_correlation_af_inv(
compile_mode=compile_mode,
),
)





"""
A Statistically and Numerically Efficient Independence Test Based on

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring needs to be inside the corresponding function.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I completed the task

Random Projections and Distance Covariance

:cite:`b-dcov_random_projection`.

References
----------
.. bibliography:: ../refs.bib

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use :footcite: and .. footbibliography instead, as it is easier to maintain.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would really appreciate it if you kindly guide me how to do that, I did search this names on the internet, but I am not quite certain how to do it. It would be really nice if you guide me step-by-step process.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just use :footcite: instead of :cite: inside the text to place the footnote, and .. footbibliography:: instead of .. biblioggraphy::. You do not need to specify the .bib file, as it is done in the global config.

You can use this docstring as an example:
https://github.com/GAA-UAM/scikit-fda/blob/3eb8ace81220011559df83be0ffb9154a11fb001/skfda/exploratory/stats/_fisher_rao.py#L66-L99

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented the exact format that you have mentioned

:labelprefix: B
:keyprefix: b-
"""


def gamma_ratio(p):
"""
Parameters

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We prefer Google style docstrings over NumPy style ones.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You closed the conversation without changing it, please don't do that.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have replaced ' ' ' by " " "

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Google style docstrings, as opposed to NumPy style docstrings, use a colon to specify sections such as Args: or Returns:, instead of placing hyphens in the line below. See https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html for examples.

----------
p : is the dimension of the data

Returns
-------
TYPE float

This function evaluates the gamma ratio, which is
required to calculate the constants C_p and C_q (in function u_dist_cov_sqr_mv())

"""

return np.exp(gammaln((p+1) / 2) - gammaln(p / 2))



def rndm_projection(X, p):
"""
Parameters
----------
X : N x p, array of arrays
where, p: number of dimensions (p >= 1) and N: number of samples
p : number of dimensions (p >= 1)

Comment thread
Palash123-4 marked this conversation as resolved.
Returns
-------
X_new : an array of size N
DESCRIPTION: Random projection of multivariate array
"""

# X_std = multivariate_normal.rvs( np.zeros(p), np.identity(p), size = 1)
X_std = np.random.standard_normal(p)

X_norm = np.linalg.norm(X_std)
U_sphere = np.array(X_std) / X_norm # Normalize X_std

if p > 1:
X_new = U_sphere @ X.T
else:
X_new = U_sphere * X
return X_new


def u_dist_cov_sqr_mv(X, Y, n_projs = 500, method ='mergesort'):

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def u_dist_cov_sqr_mv(X, Y, n_projs = 500, method ='mergesort'):
def u_dist_cov_sqr_mv(X, Y, n_projs = 500, method ="mergesort"):

Please, add double quotes everywhere.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the changes

"""
Parameters
----------
X : N x p, array of arrays, where p > 1
Y : N x q, array of arrays, where q >= 1
where p and q: number of dimensions of variable X and Y, respectively and N: number of samples

n_projs : Number of projections (integer type), optional
DESCRIPTION. The default is 500.(paper suggests: n_projs < N/logN, larger n_projs provides better results)
method : fast computation method either 'mergesort' or 'avl', optional
DESCRIPTION. The default is 'mergesort'.

Returns
-------
omega_bar : Float type
DESCRIPTION: Produce fastly computed unbiased distance covariance between X and Y




Examples:
>>> import numpy as np
>>> import dcor
>>> from scipy.stats import multivariate_normal
>>> mean_vector = [2, 3, 5, 3, 2, 1]
>>> matrix_size = 6
>>> np.random.seed(123) # in order to achieve reproducible results
>>> A = 0.5 * np.random.rand(matrix_size, matrix_size)
>>> B = np.dot(A, A.transpose())
>>> n_samples = 3000
>>> mv = multivariate_normal( mean = mean_vector, cov = B)
>>> X = mv.rvs(size = n_samples, random_state = 123)
>>> X1 = X.T[:4]
>>> X2 = X.T[4:]
>>> print(f"Computing fast distance covariance = {u_dist_cov_sqr_mv(X1.T, X2.T)}")
"""

n_samples = np.shape(X)[0]
p = np.shape(X)[1]
if Y.T.ndim == 1:
q = 1
else:
q = np.shape(Y)[1]

sqrt_pi_value = math.sqrt(math.pi)
C_p = sqrt_pi_value * gamma_ratio(p)
C_q = sqrt_pi_value * gamma_ratio(q)


X_proj = np.empty(( n_projs, n_samples))
Y_proj = np.empty(( n_projs, n_samples))

for i in range(n_projs):
Comment thread
Palash123-4 marked this conversation as resolved.
X_proj[i, :] = rndm_projection(X, p)
Y_proj[i, :] = rndm_projection(Y, q)
pass

omega_ = rowwise(u_distance_covariance_sqr,
X_proj, Y_proj, rowwise_mode = method)
omega_bar = C_p * C_q * np.mean(omega_)

return omega_bar
33 changes: 33 additions & 0 deletions dcor/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

from dcor._utils import ArrayType, _sqrt, _transform_to_2d, array_namespace

from numba import njit, prange # Additional repo for dist_sum

from ._utils import _can_be_numpy_double

Array = TypeVar("Array", bound=ArrayType)
Expand Down Expand Up @@ -159,3 +161,34 @@ def pairwise_distances(

x, y = _transform_to_2d(x, y)
return _cdist(x, y, exponent=exponent)





@njit(fastmath=True, parallel=True, cache=True)
def dist_sum(X):
"""
Parameters
----------
X : 1D array.

Returns
-------
res : sum of distinct Euclidean distances corresponding to the elements of X.

Note: To implement numba, one needs to consider "numpy==1.25" or less.
"""
res = 0
for i in prange(len(X)):
for j in prange(len(X)):
if i < j:
res += np.abs(X[i] - X[j])
pass
pass
pass
return res




131 changes: 131 additions & 0 deletions dcor/independence.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@
import scipy.stats

from ._dcor import u_distance_correlation_sqr

## Additional modules for Multivariate dcov-based test of independence------------
import math
from ._dcor import u_distance_covariance_sqr, gamma_ratio, rndm_projection
from .distances import dist_sum
from ._rowwise import rowwise
from scipy.special import gammainc
# from mpmath import*




from ._dcor_internals import (
_check_same_n_elements,
_distance_matrix_generic,
Expand Down Expand Up @@ -347,3 +359,122 @@ def distance_correlation_t_test(
p_value = 1 - scipy.stats.t.cdf(t_test, df=df)

return HypothesisTest(pvalue=p_value, statistic=t_test)

#-----------------------------------------------------------------------------------------------------------------

"""
A Statistically and Numerically Efficient Independence Test Based on Random Projections and Distance Covariance

:cite:`b-dcov_random_projection`.

References
----------
.. bibliography:: ../refs.bib
:labelprefix: B
:keyprefix: b-
"""


def gamma_cdf(x, shape, scale):
# mp.dps = 25; mp.pretty = True
# return gammainc(shape, a = 0, b = float(x / scale)) / np.exp(gammaln(shape))
return gammainc(shape, float(x / scale))

def u_dist_cov_sqr_mv_test(X, Y, n_projs= 500, method='mergesort'):
"""

Parameters
----------
X : N x p, array of arrays, where p > 1
Y : N x q, array of arrays, where q >= 1
where p and q are the number of dimensions of X and Y, respectively and N: number of samples

n_projs : Number of projections (integer type), optional
DESCRIPTION. The default is 500. (paper suggested to consider: n_projs < N/logN, larger n_projs provides better results)
method : fast computation method either 'mergesort' or 'avl', optional
DESCRIPTION. The default is 'mergesort'.

Returns
-------
Results of the hypothesis test.

Examples:
>>> import numpy as np
>>> import dcor
>>> from scipy.stats import multivariate_normal
>>> mean_vector = [2, 3, 5, 3, 2, 1]
>>> matrix_size = 6
>>> A = 0.5 * np.random.rand(matrixSize, matrix_size)
>>> B = np.dot(A, A.transpose())
>>> n_samples = 3000
>>> X = multivariate_normal.rvs(mean_vector, B, size = n_samples)
>>> X1 = X.T[:4]
>>> X2 = X.T[4:]
>>> print(f"Test of independence using fast distance covariance = {u_dist_cov_sqr_mv_test(X1.T, X2.T)}")

"""

n_samples = np.shape(X)[0]
p = np.shape(X)[1]
if Y.T.ndim == 1:
q = 1
else:
q = np.shape(Y)[1]


sqrt_pi_value = math.sqrt(math.pi)
C_p = sqrt_pi_value * gamma_ratio(p)
C_q = sqrt_pi_value * gamma_ratio(q)

X_proj_1 = np.empty(( n_projs, n_samples))
Y_proj_1 = np.empty(( n_projs, n_samples))
X_proj_2 = np.empty(( n_projs, n_samples))
Y_proj_2 = np.empty(( n_projs, n_samples))
S2_n = 0
S3_n = 0

for i in range(n_projs):
Comment thread
Palash123-4 marked this conversation as resolved.
X_proj_1[ i, :] = rndm_projection(X, p)
Y_proj_1[ i, :] = rndm_projection(Y, q)
S2_n += (2 * dist_sum(X_proj_1[ i, :]))
S3_n += (2 * dist_sum(Y_proj_1[ i, :]))
X_proj_2[ i, :] = rndm_projection(X, p)
Y_proj_2[ i, :] = rndm_projection(Y, q)

omega1_ = rowwise(u_distance_covariance_sqr,
X_proj_1, Y_proj_1, rowwise_mode= method)
omega1_bar = C_p * C_q * np.mean(omega1_)

S11_ = np.array(rowwise(u_distance_covariance_sqr,
X_proj_1, X_proj_1, rowwise_mode= method))
S12_ = np.array(rowwise(u_distance_covariance_sqr,
Y_proj_1, Y_proj_1, rowwise_mode= method))
S1_bar = C_p * C_q * np.mean(S11_* S12_)

S2_bar = (C_p * S2_n) / (n_projs * n_samples * (n_samples-1))
S3_bar = (C_q * S3_n) / (n_projs * n_samples * (n_samples-1))

omega2_ = rowwise(u_distance_covariance_sqr,
X_proj_1, X_proj_2, rowwise_mode = method)
omega2_bar = (C_p ** 2) * np.mean(omega2_)

omega3_ = rowwise(u_distance_covariance_sqr,
Y_proj_1, Y_proj_2, rowwise_mode = method)
omega3_bar = (C_q ** 2) * np.mean(omega3_)

# calculate alpha and beta--------------------------------------
denom = (((n_projs-1) * omega2_bar * omega3_bar) + S1_bar) / n_projs
alpha = (0.5 * ((S2_bar * S3_bar) ** 2)) / denom
beta = (0.5 * S2_bar * S3_bar) / denom

# calculate test statistic and the p-value--------------
Test_statistic = ((n_samples * omega1_bar) + (S2_bar * S3_bar))

p_val = 1 - gamma_cdf(Test_statistic,
shape = alpha, scale = float(1 / beta))

if p_val < 0: p_val = 0 # Adjust the output of numerical integration as produced by gammainc


return HypothesisTest(pvalue = p_val, statistic = Test_statistic)

13 changes: 13 additions & 0 deletions docs/refs.bib
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@

% Random projections based distance covariance
@article{dcov_random_projection,
title={A Statistically and Numerically Efficient Independence Test Based on Random Projections and Distance Covariance},
author={Huang, Cheng and Huo, Xiaoming},
journal={Frontiers in Applied Mathematics and Statistics},
volume={7},
pages={779841},
year={2022},
url = {https://www.frontiersin.org/articles/10.3389/fams.2021.779841/full}
publisher={Frontiers Media SA}
}

% Energy distance
@article{energy_distance,
title = "Energy statistics: A class of statistics based on distances",
Expand Down