diff --git a/great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py b/great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py deleted file mode 100644 index 3bce0589a221..000000000000 --- a/great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - -from great_expectations.expectations.expectation import ( - BatchExpectation, -) - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnBootstrappedKsTestPValueToBeGreaterThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "package": "great_expectations", - "tags": [ - "core expectation", - "column aggregate expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = () - args_keys = ( - "column", - "distribution", - "p_value", - "params", - ) diff --git a/great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py b/great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py deleted file mode 100644 index 6153149edd1c..000000000000 --- a/great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import annotations - -from great_expectations.expectations.expectation import ( - BatchExpectation, -) - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnChiSquareTestPValueToBeGreaterThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = () - args_keys = ( - "column", - "partition_object", - "p", - "tail_weight_holdout", - ) diff --git a/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py b/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py deleted file mode 100644 index bb004030a35f..000000000000 --- a/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py +++ /dev/null @@ -1,169 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional - -from great_expectations.expectations.expectation import ( - BatchExpectation, - render_suite_parameter_string, -) -from great_expectations.render import ( - LegacyDiagnosticRendererType, - LegacyRendererType, - RenderedStringTemplateContent, - RenderedTableContent, -) -from great_expectations.render.renderer.renderer import renderer -from great_expectations.render.renderer_configuration import ( - RendererConfiguration, - RendererValueType, -) -from great_expectations.render.util import num_to_str, substitute_none_for_missing - -if TYPE_CHECKING: - from great_expectations.core import ( - ExpectationValidationResult, - ) - from great_expectations.expectations.expectation_configuration import ( - ExpectationConfiguration, - ) - from great_expectations.render.renderer_configuration import AddParamArgs - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnPairCramersPhiValueToBeLessThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "tags": [ - "core expectation", - "multi-column expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = ( - "column_A", - "column_B", - "threshold", - ) - # default_kwarg_values = { - # "column_A": None, - # "column_B": None, - # "bins_A": None, - # "bins_B": None, - # "n_bins_A": None, - # "n_bins_B": None, - # "threshold": 0.1, - # "result_format": "BASIC", - # "catch_exceptions": False, - # } - args_keys = ( - "column_A", - "column_B", - ) - - @classmethod - def _prescriptive_template( - cls, - renderer_configuration: RendererConfiguration, - ) -> RendererConfiguration: - add_param_args: AddParamArgs = ( - ("column_A", RendererValueType.STRING), - ("column_B", RendererValueType.STRING), - ) - for name, param_type in add_param_args: - renderer_configuration.add_param(name=name, param_type=param_type) - - params = renderer_configuration.params - - if not params.column_A or not params.column_B: - renderer_configuration.template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 # FIXME CoP - else: - renderer_configuration.template_str = ( - "Values in $column_A and $column_B must be independent." - ) - - return renderer_configuration - - @classmethod - @renderer(renderer_type=LegacyRendererType.PRESCRIPTIVE) - @render_suite_parameter_string - def _prescriptive_renderer( - cls, - configuration: Optional[ExpectationConfiguration] = None, - result: Optional[ExpectationValidationResult] = None, - runtime_configuration: Optional[dict] = None, - **kwargs, - ): - runtime_configuration = runtime_configuration or {} - _ = runtime_configuration.get("include_column_name") is not False - styling = runtime_configuration.get("styling") - params = substitute_none_for_missing(configuration.kwargs, ["column_A", "column_B"]) - if (params["column_A"] is None) or (params["column_B"] is None): - template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 # FIXME CoP - else: - template_str = "Values in $column_A and $column_B must be independent." - - rendered_string_template_content = RenderedStringTemplateContent( - **{ - "content_block_type": "string_template", - "string_template": { - "template": template_str, - "params": params, - "styling": styling, - }, - } - ) - - return [rendered_string_template_content] - - @classmethod - @renderer(renderer_type=LegacyDiagnosticRendererType.OBSERVED_VALUE) - def _diagnostic_observed_value_renderer( - cls, - configuration: Optional[ExpectationConfiguration] = None, - result: Optional[ExpectationValidationResult] = None, - runtime_configuration: Optional[dict] = None, - **kwargs, - ): - observed_value = result.result.get("observed_value") - column_A = result.expectation_config.kwargs["column_A"] - column_B = result.expectation_config.kwargs["column_B"] - crosstab = result.result.get("details", {}).get("crosstab") - - if observed_value is not None: - observed_value = num_to_str(observed_value, precision=3, use_locale=True) - if crosstab is not None: - table = [[""] + list(crosstab.columns)] - for col in range(len(crosstab)): - table.append([crosstab.index[col]] + list(crosstab.iloc[col, :])) - - return RenderedTableContent( - **{ - "content_block_type": "table", - "header": f"Observed cramers phi of {observed_value}. \n" - f"Crosstab between {column_A} (rows) and {column_B} (columns):", - "table": table, - "styling": { - "body": { - "classes": [ - "table", - "table-sm", - "table-unbordered", - "col-4", - "mt-2", - ], - } - }, - } - ) - else: - return observed_value - else: - return "--" diff --git a/great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py b/great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py deleted file mode 100644 index dcf85420f954..000000000000 --- a/great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -from great_expectations.expectations.expectation import ( - BatchExpectation, -) - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnParameterizedDistributionKsTestPValueToBeGreaterThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = () - args_keys = () diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py b/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py index 6cfc1032ba38..7f615ec995ca 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py @@ -19,9 +19,6 @@ from .column_min import ColumnMin from .column_most_common_value import ColumnMostCommonValue from .column_non_null_count import ColumnNonNullCount -from .column_parameterized_distribution_ks_test_p_value import ( - ColumnParameterizedDistributionKSTestPValue, -) from .column_partition import ColumnPartition from .column_proportion_of_non_null_values import ColumnNonNullProportion from .column_proportion_of_unique_values import ColumnUniqueProportion diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py deleted file mode 100644 index ad4626af6d34..000000000000 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Final - -from great_expectations.execution_engine import PandasExecutionEngine -from great_expectations.expectations.metrics.column_aggregate_metric_provider import ( - ColumnAggregateMetricProvider, - column_aggregate_value, -) -from great_expectations.expectations.metrics.util import ( - is_valid_continuous_partition_object, -) - -logger = logging.getLogger(__name__) - - -import numpy as np -from scipy import stats - -NP_RANDOM_GENERATOR: Final = np.random.default_rng() - - -class ColumnBootstrappedKSTestPValue(ColumnAggregateMetricProvider): - """MetricProvider Class for Aggregate Standard Deviation metric""" - - metric_name = "column.bootstrapped_ks_test_p_value" - value_keys = ("partition_object", "p", "bootstrap_sample", "bootstrap_sample_size") - - @column_aggregate_value(engine=PandasExecutionEngine) - def _pandas( # noqa: C901 # FIXME CoP - cls, - column, - partition_object=None, - p=0.05, - bootstrap_samples=None, - bootstrap_sample_size=None, - **kwargs, - ): - if not is_valid_continuous_partition_object(partition_object): - raise ValueError("Invalid continuous partition object.") # noqa: TRY003 # FIXME CoP - - # TODO: consider changing this into a check that tail_weights does not exist exclusively, by moving this check into is_valid_continuous_partition_object # noqa: E501 # FIXME CoP - if (partition_object["bins"][0] == -np.inf) or (partition_object["bins"][-1] == np.inf): - raise ValueError("Partition endpoints must be finite.") # noqa: TRY003 # FIXME CoP - - if "tail_weights" in partition_object and np.sum(partition_object["tail_weights"]) > 0: - raise ValueError("Partition cannot have tail weights -- endpoints must be finite.") # noqa: TRY003 # FIXME CoP - - test_cdf = np.append(np.array([0]), np.cumsum(partition_object["weights"])) - - def estimated_cdf(x): - return np.interp(x, partition_object["bins"], test_cdf) - - if bootstrap_samples is None: - bootstrap_samples = 1000 - - if bootstrap_sample_size is None: - # Sampling too many elements (or not bootstrapping) will make the test too sensitive to the fact that we've # noqa: E501 # FIXME CoP - # compressed via a partition. - - # Sampling too few elements will make the test insensitive to significant differences, especially # noqa: E501 # FIXME CoP - # for nonoverlapping ranges. - bootstrap_sample_size = len(partition_object["weights"]) * 2 - - results = [ - stats.kstest( - NP_RANDOM_GENERATOR.choice(column, size=bootstrap_sample_size), - estimated_cdf, - )[1] - for _ in range(bootstrap_samples) - ] - - test_result = (1 + sum(x >= p for x in results)) / (bootstrap_samples + 1) - - hist, _bin_edges = np.histogram(column, partition_object["bins"]) - below_partition = len(np.where(column < partition_object["bins"][0])[0]) - above_partition = len(np.where(column > partition_object["bins"][-1])[0]) - - # Expand observed partition to report, if necessary - if below_partition > 0 and above_partition > 0: - observed_bins = [np.min(column)] + partition_object["bins"] + [np.max(column)] - observed_weights = np.concatenate(([below_partition], hist, [above_partition])) / len( - column - ) - elif below_partition > 0: - observed_bins = [np.min(column)] + partition_object["bins"] - observed_weights = np.concatenate(([below_partition], hist)) / len(column) - elif above_partition > 0: - observed_bins = partition_object["bins"] + [np.max(column)] - observed_weights = np.concatenate((hist, [above_partition])) / len(column) - else: - observed_bins = partition_object["bins"] - observed_weights = hist / len(column) - - observed_cdf_values = np.cumsum(observed_weights) - - # TODO: How should this metric's return_obj be structured? - return_obj = { - "observed_value": test_result, - "details": { - "bootstrap_samples": bootstrap_samples, - "bootstrap_sample_size": bootstrap_sample_size, - "observed_partition": { - "bins": observed_bins, - "weights": observed_weights.tolist(), - }, - "expected_partition": { - "bins": partition_object["bins"], - "weights": partition_object["weights"], - }, - "observed_cdf": { - "x": observed_bins, - "cdf_values": [0] + observed_cdf_values.tolist(), - }, - "expected_cdf": { - "x": partition_object["bins"], - "cdf_values": test_cdf.tolist(), - }, - }, - } - - return return_obj diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py index 34356966cc3d..dbe82b783020 100644 --- a/great_expectations/self_check/util.py +++ b/great_expectations/self_check/util.py @@ -1136,14 +1136,11 @@ def candidate_test_is_on_temporary_notimplemented_list_v2_api(context, expectati "expect_column_values_to_match_json_schema", "expect_column_stdev_to_be_between", "expect_column_most_common_value_to_be_in_set", - "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", - "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", "expect_column_pair_values_to_be_equal", "expect_column_pair_values_a_to_be_greater_than_b", "expect_select_column_values_to_be_unique_within_record", "expect_compound_columns_to_be_unique", "expect_multicolumn_values_to_be_unique", - "expect_column_pair_cramers_phi_value_to_be_less_than", "expect_multicolumn_sum_to_equal", "expect_column_value_z_scores_to_be_less_than", ] @@ -1155,9 +1152,6 @@ def candidate_test_is_on_temporary_notimplemented_list_v2_api(context, expectati expectations_not_implemented_v2_sql.append( "expect_column_kl_divergence_to_be_less_than" ) # TODO: unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 - expectations_not_implemented_v2_sql.append( - "expect_column_chisquare_test_p_value_to_be_greater_than" - ) # TODO: unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 expectations_not_implemented_v2_sql.append( "expect_column_values_to_be_between" ) # TODO: error unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 @@ -1182,10 +1176,7 @@ def candidate_test_is_on_temporary_notimplemented_list_v2_api(context, expectati return expectation_type in [ "expect_column_values_to_be_dateutil_parseable", "expect_column_values_to_be_json_parseable", - "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", - "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", "expect_compound_columns_to_be_unique", - "expect_column_pair_cramers_phi_value_to_be_less_than", "expect_table_row_count_to_equal_other_table", "expect_column_value_z_scores_to_be_less_than", ]