From b46022b4f0006169800872b4afa49cb3a762dc56 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 2 Apr 2026 20:05:03 +0200 Subject: [PATCH 1/3] [MAINTENANCE] Remove dead code: statistical expectations and schemas Removes dead statistical expectations (bootstrapped KS, chi-square, Cramers phi, parameterized distribution KS), their metric, and the unused core/schemas/ directory. Cleans up string references in self_check/util.py. --- ...pped_ks_test_p_value_to_be_greater_than.py | 33 -- ...isquare_test_p_value_to_be_greater_than.py | 32 -- ..._pair_cramers_phi_value_to_be_less_than.py | 169 ------ ...tion_ks_test_p_value_to_be_greater_than.py | 27 - .../ExpectColumnDistinctValuesToBeInSet.json | 503 ----------------- ...xpectColumnDistinctValuesToContainSet.json | 503 ----------------- .../ExpectColumnDistinctValuesToEqualSet.json | 503 ----------------- .../ExpectColumnKLDivergenceToBeLessThan.json | 531 ------------------ .../schemas/ExpectColumnMaxToBeBetween.json | 497 ---------------- .../schemas/ExpectColumnMeanToBeBetween.json | 497 ---------------- .../ExpectColumnMedianToBeBetween.json | 497 ---------------- .../schemas/ExpectColumnMinToBeBetween.json | 497 ---------------- .../ExpectColumnMostCommonValueToBeInSet.json | 515 ----------------- ...pectColumnPairValuesAToBeGreaterThanB.json | 483 ---------------- .../ExpectColumnPairValuesToBeEqual.json | 472 ---------------- .../ExpectColumnPairValuesToBeInSet.json | 491 ---------------- ...nProportionOfNonNullValuesToBeBetween.json | 484 ---------------- ...mnProportionOfUniqueValuesToBeBetween.json | 498 ---------------- ...ExpectColumnQuantileValuesToBeBetween.json | 493 ---------------- .../schemas/ExpectColumnStdevToBeBetween.json | 481 ---------------- .../schemas/ExpectColumnSumToBeBetween.json | 497 ---------------- .../core/schemas/ExpectColumnToExist.json | 248 -------- ...pectColumnUniqueValueCountToBeBetween.json | 498 ---------------- .../ExpectColumnValueLengthsToBeBetween.json | 497 ---------------- .../ExpectColumnValueLengthsToEqual.json | 460 --------------- .../ExpectColumnValueZScoresToBeLessThan.json | 473 ---------------- .../ExpectColumnValuesToBeBetween.json | 513 ----------------- .../schemas/ExpectColumnValuesToBeInSet.json | 519 ----------------- .../ExpectColumnValuesToBeInTypeList.json | 458 --------------- .../schemas/ExpectColumnValuesToBeNull.json | 447 --------------- .../schemas/ExpectColumnValuesToBeOfType.json | 460 --------------- .../schemas/ExpectColumnValuesToBeUnique.json | 447 --------------- .../ExpectColumnValuesToMatchLikePattern.json | 458 --------------- ...ectColumnValuesToMatchLikePatternList.json | 478 ---------------- .../ExpectColumnValuesToMatchRegex.json | 458 --------------- .../ExpectColumnValuesToMatchRegexList.json | 478 ---------------- .../ExpectColumnValuesToNotBeInSet.json | 519 ----------------- .../ExpectColumnValuesToNotBeNull.json | 447 --------------- ...pectColumnValuesToNotMatchLikePattern.json | 457 --------------- ...ColumnValuesToNotMatchLikePatternList.json | 460 --------------- .../ExpectColumnValuesToNotMatchRegex.json | 457 --------------- ...ExpectColumnValuesToNotMatchRegexList.json | 460 --------------- .../ExpectCompoundColumnsToBeUnique.json | 459 --------------- .../schemas/ExpectMulticolumnSumToEqual.json | 480 ---------------- .../ExpectQueryResultsToMatchComparison.json | 262 --------- ...ectColumnValuesToBeUniqueWithinRecord.json | 462 --------------- .../ExpectTableColumnCountToBeBetween.json | 267 --------- .../ExpectTableColumnCountToEqual.json | 242 -------- .../ExpectTableColumnsToMatchOrderedList.json | 245 -------- .../schemas/ExpectTableColumnsToMatchSet.json | 258 --------- .../ExpectTableRowCountToBeBetween.json | 480 ---------------- .../schemas/ExpectTableRowCountToEqual.json | 437 -------------- .../ExpectTableRowCountToEqualOtherTable.json | 435 -------------- .../expectations/core/schemas/README.md | 9 - .../schemas/UnexpectedRowsExpectation.json | 223 -------- .../expectations/core/schemas/__init__.py | 0 .../column_aggregate_metrics/__init__.py | 3 - .../column_bootstrapped_ks_test_p_value.py | 123 ---- great_expectations/self_check/util.py | 9 - 59 files changed, 22789 deletions(-) delete mode 100644 great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py delete mode 100644 great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py delete mode 100644 great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py delete mode 100644 great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnToExist.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json delete mode 100644 great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json delete mode 100644 great_expectations/expectations/core/schemas/README.md delete mode 100644 great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json delete mode 100644 great_expectations/expectations/core/schemas/__init__.py delete mode 100644 great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py diff --git a/great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py b/great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py deleted file mode 100644 index 3bce0589a221..000000000000 --- a/great_expectations/expectations/core/expect_column_bootstrapped_ks_test_p_value_to_be_greater_than.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - -from great_expectations.expectations.expectation import ( - BatchExpectation, -) - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnBootstrappedKsTestPValueToBeGreaterThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "package": "great_expectations", - "tags": [ - "core expectation", - "column aggregate expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = () - args_keys = ( - "column", - "distribution", - "p_value", - "params", - ) diff --git a/great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py b/great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py deleted file mode 100644 index 6153149edd1c..000000000000 --- a/great_expectations/expectations/core/expect_column_chisquare_test_p_value_to_be_greater_than.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import annotations - -from great_expectations.expectations.expectation import ( - BatchExpectation, -) - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnChiSquareTestPValueToBeGreaterThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = () - args_keys = ( - "column", - "partition_object", - "p", - "tail_weight_holdout", - ) diff --git a/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py b/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py deleted file mode 100644 index bb004030a35f..000000000000 --- a/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py +++ /dev/null @@ -1,169 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Optional - -from great_expectations.expectations.expectation import ( - BatchExpectation, - render_suite_parameter_string, -) -from great_expectations.render import ( - LegacyDiagnosticRendererType, - LegacyRendererType, - RenderedStringTemplateContent, - RenderedTableContent, -) -from great_expectations.render.renderer.renderer import renderer -from great_expectations.render.renderer_configuration import ( - RendererConfiguration, - RendererValueType, -) -from great_expectations.render.util import num_to_str, substitute_none_for_missing - -if TYPE_CHECKING: - from great_expectations.core import ( - ExpectationValidationResult, - ) - from great_expectations.expectations.expectation_configuration import ( - ExpectationConfiguration, - ) - from great_expectations.render.renderer_configuration import AddParamArgs - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnPairCramersPhiValueToBeLessThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "tags": [ - "core expectation", - "multi-column expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = ( - "column_A", - "column_B", - "threshold", - ) - # default_kwarg_values = { - # "column_A": None, - # "column_B": None, - # "bins_A": None, - # "bins_B": None, - # "n_bins_A": None, - # "n_bins_B": None, - # "threshold": 0.1, - # "result_format": "BASIC", - # "catch_exceptions": False, - # } - args_keys = ( - "column_A", - "column_B", - ) - - @classmethod - def _prescriptive_template( - cls, - renderer_configuration: RendererConfiguration, - ) -> RendererConfiguration: - add_param_args: AddParamArgs = ( - ("column_A", RendererValueType.STRING), - ("column_B", RendererValueType.STRING), - ) - for name, param_type in add_param_args: - renderer_configuration.add_param(name=name, param_type=param_type) - - params = renderer_configuration.params - - if not params.column_A or not params.column_B: - renderer_configuration.template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 # FIXME CoP - else: - renderer_configuration.template_str = ( - "Values in $column_A and $column_B must be independent." - ) - - return renderer_configuration - - @classmethod - @renderer(renderer_type=LegacyRendererType.PRESCRIPTIVE) - @render_suite_parameter_string - def _prescriptive_renderer( - cls, - configuration: Optional[ExpectationConfiguration] = None, - result: Optional[ExpectationValidationResult] = None, - runtime_configuration: Optional[dict] = None, - **kwargs, - ): - runtime_configuration = runtime_configuration or {} - _ = runtime_configuration.get("include_column_name") is not False - styling = runtime_configuration.get("styling") - params = substitute_none_for_missing(configuration.kwargs, ["column_A", "column_B"]) - if (params["column_A"] is None) or (params["column_B"] is None): - template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 # FIXME CoP - else: - template_str = "Values in $column_A and $column_B must be independent." - - rendered_string_template_content = RenderedStringTemplateContent( - **{ - "content_block_type": "string_template", - "string_template": { - "template": template_str, - "params": params, - "styling": styling, - }, - } - ) - - return [rendered_string_template_content] - - @classmethod - @renderer(renderer_type=LegacyDiagnosticRendererType.OBSERVED_VALUE) - def _diagnostic_observed_value_renderer( - cls, - configuration: Optional[ExpectationConfiguration] = None, - result: Optional[ExpectationValidationResult] = None, - runtime_configuration: Optional[dict] = None, - **kwargs, - ): - observed_value = result.result.get("observed_value") - column_A = result.expectation_config.kwargs["column_A"] - column_B = result.expectation_config.kwargs["column_B"] - crosstab = result.result.get("details", {}).get("crosstab") - - if observed_value is not None: - observed_value = num_to_str(observed_value, precision=3, use_locale=True) - if crosstab is not None: - table = [[""] + list(crosstab.columns)] - for col in range(len(crosstab)): - table.append([crosstab.index[col]] + list(crosstab.iloc[col, :])) - - return RenderedTableContent( - **{ - "content_block_type": "table", - "header": f"Observed cramers phi of {observed_value}. \n" - f"Crosstab between {column_A} (rows) and {column_B} (columns):", - "table": table, - "styling": { - "body": { - "classes": [ - "table", - "table-sm", - "table-unbordered", - "col-4", - "mt-2", - ], - } - }, - } - ) - else: - return observed_value - else: - return "--" diff --git a/great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py b/great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py deleted file mode 100644 index dcf85420f954..000000000000 --- a/great_expectations/expectations/core/expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -from great_expectations.expectations.expectation import ( - BatchExpectation, -) - - -# NOTE: This Expectation is incomplete and not ready for use. -# It should remain unexported until it meets the requirements set by our V1 API. -class ExpectColumnParameterizedDistributionKsTestPValueToBeGreaterThan(BatchExpectation): - def __init__(self, *args, **kwargs): - raise NotImplementedError - - library_metadata = { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation", - "needs migration to modular expectations api", - ], - "contributors": ["@great_expectations"], - "requirements": [], - } - - metric_dependencies = tuple() - success_keys = () - args_keys = () diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json deleted file mode 100644 index bd16a5da3e8b..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json +++ /dev/null @@ -1,503 +0,0 @@ -{ - "title": "Expect column distinct values to be in set", - "description": "Expect the set of distinct column values to be contained by a given set.\n\nExpectColumnDistinctValuesToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n The success value for this expectation will match that of [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set).\n\nSee Also:\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2, 3, 4, 5]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_set": { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "anyOf": [ - { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "oneOf": [ - { - "title": "Text", - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "examples": [ - [ - "a", - "b", - "c", - "d", - "e" - ], - [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05" - ] - ] - }, - { - "title": "Numbers", - "type": "array", - "items": { - "type": "number" - }, - "minItems": 1, - "examples": [ - [ - 1, - 2, - 3, - 4, - 5 - ], - [ - 1.1, - 2.2, - 3.3, - 4.4, - 5.5 - ], - [ - 1, - 2.2, - 3, - 4.4, - 5 - ] - ] - } - ] - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnDistinctValuesToBeInSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_distinct_values_to_be_in_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the set of distinct column values to be contained by a given set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json deleted file mode 100644 index f9602e8bf38b..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json +++ /dev/null @@ -1,503 +0,0 @@ -{ - "title": "Expect column distinct values to contain set", - "description": "Expect the set of distinct column values to contain a given set.\n\nExpectColumnDistinctValuesToContainSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test\",\n value_set=[1, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_set": { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "anyOf": [ - { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "oneOf": [ - { - "title": "Text", - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "examples": [ - [ - "a", - "b", - "c", - "d", - "e" - ], - [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05" - ] - ] - }, - { - "title": "Numbers", - "type": "array", - "items": { - "type": "number" - }, - "minItems": 1, - "examples": [ - [ - 1, - 2, - 3, - 4, - 5 - ], - [ - 1.1, - 2.2, - 3.3, - 4.4, - 5.5 - ], - [ - 1, - 2.2, - 3, - 4.4, - 5 - ] - ] - } - ] - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnDistinctValuesToContainSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_distinct_values_to_contain_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the set of distinct column values to contain a given set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json deleted file mode 100644 index ae786ff01611..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json +++ /dev/null @@ -1,503 +0,0 @@ -{ - "title": "Expect column distinct values to equal set", - "description": "Expect the set of distinct column values to equal a given set.\n\nExpectColumnDistinctValuesToEqualSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_set": { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "anyOf": [ - { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "oneOf": [ - { - "title": "Text", - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "examples": [ - [ - "a", - "b", - "c", - "d", - "e" - ], - [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05" - ] - ] - }, - { - "title": "Numbers", - "type": "array", - "items": { - "type": "number" - }, - "minItems": 1, - "examples": [ - [ - 1, - 2, - 3, - 4, - 5 - ], - [ - 1.1, - 2.2, - 3.3, - 4.4, - 5.5 - ], - [ - 1, - 2.2, - 3, - 4.4, - 5 - ] - ] - } - ] - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnDistinctValuesToEqualSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_distinct_values_to_equal_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the set of distinct column values to equal a given set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json b/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json deleted file mode 100644 index 86a5dca97a17..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json +++ /dev/null @@ -1,531 +0,0 @@ -{ - "title": "Expect column KL divergence to be less than", - "description": "Expect the Kulback-Leibler (KL) divergence (relative entropy) of the specified column with respect to the partition object to be lower than the provided threshold.\n\nKL divergence compares two distributions. The higher the divergence value (relative entropy), the larger the difference between the two distributions. A relative entropy of zero indicates that the data are distributed identically, when binned according to the provided partition.\n\nIn many practical contexts, choosing a value between 0.5 and 1 will provide a useful test.\n\nThis expectation works on both categorical and continuous partitions. See notes below for details.\n\nExpectColumnKlDivergenceToBeLessThan is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n partition_object (dict or None): The expected partition object. See [partition_object](https://docs.greatexpectations.io/docs/reference/expectations/distributional_expectations/#partition-objects).\n threshold (float or None): The maximum KL divergence to for which to return success=True. If KL divergence is largerthan the provided threshold, the test will return success=False.\n internal_weight_holdout (float between 0 and 1 or None): The amount of weight to split uniformly among zero-weighted partition bins. internal_weight_holdout provides a mechanisms to make the test less strict by assigning positive weights to values observed in the data for which the partition explicitly expected zero weight. With no internal_weight_holdout, any value observed in\n such a region will cause KL divergence to rise to +Infinity. Defaults to 0.\n tail_weight_holdout (float between 0 and 1 or None): The amount of weight to add to the tails of the histogram.Tail weight holdout is split evenly between (-Infinity, min(partition_object['bins'])) and (max(partition_object['bins']), +Infinity). tail_weight_holdout provides a mechanism to make the test less strict by assigning positive weights to values observed in the data that are not present in the partition. With no tail_weight_holdout, any value observed outside the provided partition_object will cause KL divergence to rise to +Infinity. Defaults to 0.\n bucketize_data (boolean): If True, then continuous data will be bucketized before evaluation. Setting this parameter to false allows evaluation of KL divergence with a None partition object for profiling against discrete data.\n min_value (float or None): The minimum value for the column.\n max_value (float or None): The maximum value for the column.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * observed_value field in the result object is customized for this expectation to be a float representing the true KL divergence (relative entropy) or None if the value is calculated as infinity, -infinity, or NaN\n * details.observed_partition in the result object is customized for this expectation to be a dict representing the partition observed in the data\n * details.expected_partition in the result object is customized for this expectation to be a dict representing the partition against which the data were compared, after applying specified weight holdouts\n\n If the partition_object is categorical, this expectation will expect the values in column to also be categorical.\n\n * If the column includes values that are not present in the partition, the tail_weight_holdout will be equally split among those values, providing a mechanism to weaken the strictness of the expectation (otherwise, relative entropy would immediately go to infinity).\n * If the partition includes values that are not present in the column, the test will simply include zero weight for that value.\n\n If the partition_object is continuous, this expectation will discretize the values in the column according to the bins specified in the partition_object, and apply the test to the resulting distribution.\n\n * The internal_weight_holdout and tail_weight_holdout parameters provide a mechanism to weaken the expectation, since an expected weight of zero would drive relative entropy to be infinite if any data are observed in that interval.\n * If internal_weight_holdout is specified, that value will be distributed equally among any intervals with weight zero in the partition_object.\n * If tail_weight_holdout is specified, that value will be appended to the tails of the bins ((-Infinity, min(bins)) and (max(bins), Infinity).\n\n If relative entropy/kl divergence goes to infinity for any of the reasons mentioned above, the observed value will be set to None. This is because inf, -inf, Nan, are not json serializable and cause some json parsers to crash when encountered. The python None token will be serialized to null in json.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test\n 0 \"A\"\n 1 \"A\"\n 2 \"A\"\n 3 \"A\"\n 4 \"A\"\n 5 \"B\"\n 6 \"B\"\n 7 \"B\"\n 8 \"C\"\n 9 \"C\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnKLDivergenceToBeLessThan(\n column=\"test\",\n partition_object={\"weights\": [0.5, 0.3, 0.2], \"values\": [\"A\", \"B\", \"C\"]},\n threshold=0.1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.0,\n \"details\": {\n \"observed_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.5,\n 0.3,\n 0.2\n ]\n },\n \"expected_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.5,\n 0.3,\n 0.2\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnKLDivergenceToBeLessThan(\n column=\"test\",\n partition_object={\"weights\": [0.3333333333333333, 0.3333333333333333, 0.3333333333333333], \"values\": [\"A\", \"B\", \"C\"]},\n threshold=0.01\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.06895927460353621,\n \"details\": {\n \"observed_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.5,\n 0.3,\n 0.2\n ]\n },\n \"expected_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.3333333333333333,\n 0.3333333333333333,\n 0.3333333333333333\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "partition_object": { - "title": "Partition Object", - "description": "The expected partition object.", - "type": "object" - }, - "threshold": { - "title": "Threshold", - "description": "The maximum KL divergence to for which to return success=True. If KL divergence is largerthan the provided threshold, the test will return success=False.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "internal_weight_holdout": { - "title": "Internal Weight Holdout", - "description": "The amount of weight to split uniformly among zero-weighted partition bins. internal_weight_holdout provides a mechanisms to make the test less strict by assigning positive weights to values observed in the data for which the partition explicitly expected zero weight.", - "default": 0, - "anyOf": [ - { - "type": "number", - "minimum": 0, - "maximum": 1 - }, - { - "type": "object" - } - ] - }, - "tail_weight_holdout": { - "title": "Tail Weight Holdout", - "description": "The amount of weight to add to the tails of the histogram.Tail weight holdout is split evenly between (-Infinity, min(partition_object['bins'])) and (max(partition_object['bins']), +Infinity). tail_weight_holdout provides a mechanism to make the test less strict by assigning positive weights to values observed in the data that are not present in the partition. With no tail_weight_holdout, any value observed outside the provided partition_object will cause KL divergence to rise to +Infinity. Defaults to 0.", - "default": 0, - "anyOf": [ - { - "type": "number", - "minimum": 0, - "maximum": 1 - }, - { - "type": "object" - } - ] - }, - "bucketize_data": { - "title": "Bucketize Data", - "description": "If True, then continuous data will be bucketized before evaluation. Setting this parameter to false allows evaluation of KL divergence with a None partition object for profiling against discrete data.", - "default": true, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value for the column.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value for the column.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnKLDivergenceToBeLessThan" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_kl_divergence_to_be_less_than" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation", - "distributional expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the Kulback-Leibler (KL) divergence (relative entropy) of the specified column with respect to the partition object to be lower than the provided threshold." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json deleted file mode 100644 index 69969f6badeb..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json +++ /dev/null @@ -1,497 +0,0 @@ -{ - "title": "Expect column maximum to be between", - "description": "Expect the column maximum to be between a minimum value and a maximum value.\n\nExpectColumnMaxToBeBetween is a Column Aggregate Expectation\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value of the acceptable range for the column maximum.\n max_value (comparable type or None): The maximum value of the acceptable range for the column maximum.\n strict_min (boolean): If True, the lower bound of the column maximum acceptable rangemust be strictly larger than min_value, default=False\n strict_max (boolean): If True, the upper bound of the column maximum acceptable rangemust be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column max\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 7.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value of the acceptable range for the column maximum.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value of the acceptable range for the column maximum.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the lower bound of the column maximum acceptable rangemust be strictly larger than min_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the upper bound of the column maximum acceptable rangemust be strictly smaller than max_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnMaxToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_max_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column maximum to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json deleted file mode 100644 index 52a304f90ba5..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json +++ /dev/null @@ -1,497 +0,0 @@ -{ - "title": "Expect column mean to be between", - "description": "Expect the column mean to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnMeanToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column mean.\n max_value (float or None): The maximum value for the column mean.\n strict_min (boolean): If True, the column mean must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column mean must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound.\n * If max_value is None, then min_value is treated as a lower bound.\n * observed_value field in the result object is customized for this expectation to be a float representing the true mean for the column\n\nSee Also:\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.275\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3.375\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value for the column mean.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value for the column mean.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the column mean must be strictly larger than min_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the column mean must be strictly smaller than max_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnMeanToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_mean_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column mean to be between a minimum value and a maximum value (inclusive)." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json deleted file mode 100644 index 5fa3a0fec133..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json +++ /dev/null @@ -1,497 +0,0 @@ -{ - "title": "Expect column median to be between", - "description": "Expect the column median to be between a minimum value and a maximum value.\n\nExpectColumnMedianToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for the column median.\n max_value (int or None): The maximum value for the column median.\n strict_min (boolean): If True, the column median must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column median must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true median for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.15\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.75\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value for the column median.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value for the column median.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the column median must be strictly larger than min_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the column median must be strictly smaller than max_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnMedianToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_median_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column median to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json deleted file mode 100644 index 82798b9ae7b6..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json +++ /dev/null @@ -1,497 +0,0 @@ -{ - "title": "Expect column minimum to be between", - "description": "Expect the column minimum to be between a minimum value and a maximum value.\n\nExpectColumnMinToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal column minimum allowed.\n max_value (comparable type or None): The maximal column minimum allowed.\n strict_min (boolean): If True, the minimal column minimum must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the maximal column minimum must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column min\n\nSee Also:\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMinToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .8\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimal column minimum allowed.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximal column minimum allowed.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the minimal column minimum must be strictly larger than min_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the maximal column minimum must be strictly smaller than max_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnMinToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_min_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column minimum to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json deleted file mode 100644 index 7011f29bdea9..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json +++ /dev/null @@ -1,515 +0,0 @@ -{ - "title": "Expect column most common value to be in set", - "description": "Expect the most common value to be within the designated value set.\n\nExpectColumnMostCommonValueToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A list of potential values to match.\n ties_okay (boolean or None): If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values. Default False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * observed_value field in the result object is customized for this expectation to be a list representing the most common values in the column, which is often a single element... if there is a tie for most common among multiple values, observed_value will contain a single copy of each most common value\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test2\",\n value_set=[1, 2, 4],\n ties_okay=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ]\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_set": { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "anyOf": [ - { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "oneOf": [ - { - "title": "Text", - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "examples": [ - [ - "a", - "b", - "c", - "d", - "e" - ], - [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05" - ] - ] - }, - { - "title": "Numbers", - "type": "array", - "items": { - "type": "number" - }, - "minItems": 1, - "examples": [ - [ - 1, - 2, - 3, - 4, - 5 - ], - [ - 1.1, - 2.2, - 3.3, - 4.4, - 5.5 - ], - [ - 1, - 2.2, - 3, - 4.4, - 5 - ] - ] - } - ] - }, - { - "type": "object" - } - ] - }, - "ties_okay": { - "title": "Ties Okay", - "description": "If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values.", - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnMostCommonValueToBeInSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_most_common_value_to_be_in_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the most common value to be within the designated value set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json deleted file mode 100644 index e102e81efb25..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json +++ /dev/null @@ -1,483 +0,0 @@ -{ - "title": "Expect column pair values A to be greater than B", - "description": "Expect the values in column A to be greater than column B.\n\nExpectColumnPairValuesAToBeGreaterThanB is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n or_equal (boolean or None): If True, then values can be equal, not strictly greater.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 2 1\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test\",\n column_B=\"test2\",\n or_equal=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test2\",\n column_B=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ],\n [\n 2,\n 2\n ],\n [\n 4,\n 4\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_A": { - "title": "Column A", - "description": "The first column name.", - "minLength": 1, - "type": "string" - }, - "column_B": { - "title": "Column B", - "description": "The second column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "or_equal": { - "title": "Or Equal", - "description": "If True, then values can be equal, not strictly greater.", - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "ignore_row_if": { - "title": "Ignore Row If", - "description": "If specified, sets the condition on which a given row is to be ignored.", - "default": "both_values_are_missing", - "anyOf": [ - { - "enum": [ - "both_values_are_missing", - "either_value_is_missing", - "neither" - ], - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnPairValuesAToBeGreaterThanB" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_pair_values_a_to_be_greater_than_b" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column_pair", - "description": "Column Pair Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column pair map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the values in column A to be greater than column B." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column_A", - "column_B" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json deleted file mode 100644 index 7c271c3a2c54..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json +++ /dev/null @@ -1,472 +0,0 @@ -{ - "title": "Expect column pair values to be equal", - "description": "Expect the values in column A to be the same as column B.\n\nExpectColumnPairValuesToBeEqual is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"both_values_are_missing\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 2\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=0.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=1.0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_A": { - "title": "Column A", - "description": "The first column name.", - "minLength": 1, - "type": "string" - }, - "column_B": { - "title": "Column B", - "description": "The second column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "ignore_row_if": { - "title": "Ignore Row If", - "description": "If specified, sets the condition on which a given row is to be ignored.", - "default": "both_values_are_missing", - "anyOf": [ - { - "enum": [ - "both_values_are_missing", - "either_value_is_missing", - "neither" - ], - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnPairValuesToBeEqual" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_pair_values_to_be_equal" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column_pair", - "description": "Column Pair Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column pair map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the values in column A to be the same as column B." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column_A", - "column_B" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json deleted file mode 100644 index f152d27ee199..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json +++ /dev/null @@ -1,491 +0,0 @@ -{ - "title": "Expect column pair values to be in set", - "description": "Expect the paired values from columns A and B to belong to a set of valid pairs.\n\nExpectColumnPairValuesToBeInSet is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n value_pairs_set (list of tuples): All the valid pairs to be matched.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(2,1), (1,1)],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 4,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(1,2) (4,1)],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n [\n 1,\n 1\n ],\n [\n 2,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_A": { - "title": "Column A", - "description": "The first column name.", - "minLength": 1, - "type": "string" - }, - "column_B": { - "title": "Column B", - "description": "The second column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_pairs_set": { - "title": "Value Pairs Set", - "anyOf": [ - { - "type": "array", - "items": { - "type": "array", - "minItems": 2, - "maxItems": 2, - "items": [ - {}, - {} - ] - } - }, - { - "type": "object" - } - ] - }, - "ignore_row_if": { - "title": "Ignore Row If", - "default": "both_values_are_missing", - "anyOf": [ - { - "enum": [ - "both_values_are_missing", - "either_value_is_missing", - "neither" - ], - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnPairValuesToBeInSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_pair_values_to_be_in_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column_pair", - "description": "Column Pair Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column pair map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the paired values from columns A and B to belong to a set of valid pairs." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Redshift" - ] - } - } - } - }, - "required": [ - "column_A", - "column_B", - "value_pairs_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json deleted file mode 100644 index 4f963aa201ce..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "title": "Expect column proportion of non-null values to be between", - "description": "Expect the proportion of non-null values to be between a minimum value and a maximum value.\n\nFor example, in a column containing [1, 2, None, 3, None, None, 4, 4, 4, 4], there are 7 non-null values and 10 total values for a proportion of 0.7.\n\nExpectColumnProportionOfNonNullValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum proportion of non-null values (proportions are on the range 0 to 1).\n max_value (float or None): The maximum proportion of non-null values (proportions are on the range 0 to 1).\n strict_min (boolean): If True, the minimum proportion of non-null values must be strictly larger than min_value. default=False\n strict_max (boolean): If True, the maximum proportion of non-null values must be strictly smaller than max_value. default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the proportion of non-null values in the column\n\nSee Also:\n [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" None\n 2 \"acc\" 1\n 3 None 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnProportionOfNonNullValuesToBeBetween(\n column=\"test\",\n min_value=0,\n max_value=0.8\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.75\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnProportionOfNonNullValuesToBeBetween(\n column=\"test2\",\n min_value=0.3,\n max_value=0.5,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.75\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum proportion of non-null values (proportions are on the range 0 to 1).", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum proportion of non-null values (proportions are on the range 0 to 1).", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the minimum proportion of non-null values must be strictly larger than min_value.", - "default": false, - "type": "boolean" - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the maximum proportion of non-null values must be strictly smaller than max_value.", - "default": false, - "type": "boolean" - }, - "library_metadata": { - "title": "Library Metadata", - "default": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - }, - "type": "object" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnProportionOfNonNullValuesToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_proportion_of_non_null_values_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Completeness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the proportion of non-null values to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json deleted file mode 100644 index bc0f82cee5e3..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json +++ /dev/null @@ -1,498 +0,0 @@ -{ - "title": "Expect column proportion of unique values to be between", - "description": "Expect the proportion of unique values to be between a minimum value and a maximum value.\n\nFor example, in a column containing [1, 2, 2, 3, 3, 3, 4, 4, 4, 4], there are 4 unique values and 10 total values for a proportion of 0.4.\n\nExpectColumnProportionOfUniqueValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum proportion of unique values (Proportions are on the range 0 to 1).\n max_value (float or None): The maximum proportion of unique values (Proportions are on the range 0 to 1).\n strict_min (boolean): If True, the minimum proportion of unique values must be strictly larger than min_value. default=False\n strict_max (boolean): If True, the maximum proportion of unique values must be strictly smaller than max_value. default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the proportion of unique values in the column\n\nSee Also:\n [ExpectColumnUniqueValueCountToBeBetween](https://greatexpectations.io/expectations/expect_column_unique_value_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test\",\n min_value=0,\n max_value=0.8\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .75\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test2\",\n min_value=0.3,\n max_value=0.5,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .5\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum proportion of unique values (Proportions are on the range 0 to 1).", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum proportion of unique values (Proportions are on the range 0 to 1).", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the minimum proportion of unique values must be strictly larger than min_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the maximum proportion of unique values must be strictly smaller than max_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "library_metadata": { - "title": "Library Metadata", - "default": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - }, - "type": "object" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnProportionOfUniqueValuesToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_proportion_of_unique_values_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the proportion of unique values to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json deleted file mode 100644 index 50587c0c6814..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json +++ /dev/null @@ -1,493 +0,0 @@ -{ - "title": "Expect column quantile values to be between", - "description": "Expect the specific provided column quantiles to be between a minimum value and a maximum value.\n\nExpectColumnQuantileValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nExpectColumnQuantileValuesToBeBetween can be computationally intensive for large datasets.\n\nArgs:\n column (str): The column name.\n quantile_ranges (dictionary with keys 'quantiles' and 'value_ranges'): Key 'quantiles' is an increasingly ordered list of desired quantile values (floats). Key 'value_ranges' is a list of 2-value lists that specify a lower and upper bound (inclusive) for the corresponding quantile (with [min, max] ordering). The length of the 'quantiles' list and the 'value_ranges' list must be equal.\n allow_relative_error (boolean or string): Whether to allow relative error in quantile communications on backends that support or require it.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound only\n * If max_value is None, then min_value is treated as a lower bound only\n * details.success_details field in the result object is customized for this expectation\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test\n 0 1 1\n 1 2 7\n 2 2 2.5\n 3 3 3\n 4 3 2\n 5 3 5\n 6 4 6\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1,\n 2,\n 3,\n 4\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n true,\n true\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test2\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1.0,\n 2.5,\n 5.0,\n 7.0\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n false,\n false\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "quantile_ranges": { - "title": "Quantile Ranges", - "description": "Key 'quantiles' is an increasingly ordered list of desired quantile values (floats). Key 'value_ranges' is a list of 2-value lists that specify a lower and upper bound (inclusive) for the corresponding quantile (with [min, max] ordering).", - "anyOf": [ - { - "$ref": "#/definitions/QuantileRange" - }, - { - "type": "object" - } - ] - }, - "allow_relative_error": { - "title": "Allow Relative Error", - "description": "Whether to allow relative error in quantile communications on backends that support or require it.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnQuantileValuesToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_quantile_values_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the specific provided column quantiles to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "quantile_ranges" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - }, - "QuantileRange": { - "title": "QuantileRange", - "type": "object", - "properties": { - "quantiles": { - "title": "Quantiles", - "type": "array", - "items": { - "type": "number" - } - }, - "value_ranges": { - "title": "Value Ranges", - "type": "array", - "items": { - "type": "array", - "items": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "integer" - } - ] - } - } - } - }, - "required": [ - "quantiles", - "value_ranges" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json deleted file mode 100644 index 85b84e711f2f..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json +++ /dev/null @@ -1,481 +0,0 @@ -{ - "title": "Expect column standard deviation to be between", - "description": "Expect the column standard deviation to be between a minimum value and a maximum value.\n\nUses sample standard deviation (normalized by N-1).\n\nExpectColumnStdevToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column standard deviation.\n max_value (float or None): The maximum value for the column standard deviation.\n strict_min (boolean): If True, the column standard deviation must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the column standard deviation must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true standard deviation for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.5251983752196243\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test2\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.5617376914898995\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value for the column standard deviation.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value for the column standard deviation.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the column standard deviation must be strictly larger than min_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the column standard deviation must be strictly smaller than max_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnStdevToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_stdev_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column standard deviation to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json deleted file mode 100644 index 08f79476b9b6..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json +++ /dev/null @@ -1,497 +0,0 @@ -{ - "title": "Expect column sum to be between", - "description": "Expect the column sum to be between a minimum value and a maximum value.\n\nExpectColumnSumToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal sum allowed.\n max_value (comparable type or None): The maximal sum allowed.\n strict_min (boolean): If True, the minimal sum must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the maximal sum must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column sum\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 5.1\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test2\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 13.5\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimal sum allowed.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximal sum allowed.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the minimal sum must be strictly larger than min_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the maximal sum must be strictly smaller than max_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnSumToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_sum_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column sum to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnToExist.json b/great_expectations/expectations/core/schemas/ExpectColumnToExist.json deleted file mode 100644 index d156433aedf6..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnToExist.json +++ /dev/null @@ -1,248 +0,0 @@ -{ - "title": "Expect column to exist", - "description": "Checks for the existence of a specified column within a table.\n\nExpectColumnToExist is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation. They are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n column_index (int or None, optional): If not None, checks the order of the columns. The expectation will fail if the column is not in location column_index (zero-indexed).\n result_format (str or None, optional): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None, optional): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None, optional): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nPassing Case:\n Input:\n ExpectColumnToExist(\n column=\"test\",\n column_index=0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {}\n }\n\nFailing Case:\n Input:\n ExpectColumnToExist(\n column=\"missing_column\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {}\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "column_index": { - "title": "Column Index", - "description": "If not None, checks the order of the columns. The expectation will fail if the column is not in location column_index (zero-indexed).", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnToExist" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_to_exist" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Checks for the existence of a specified column within a table." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json deleted file mode 100644 index 0a718a9c1ada..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json +++ /dev/null @@ -1,498 +0,0 @@ -{ - "title": "Expect column unique value count to be between", - "description": "Expect the number of unique values to be between a minimum value and a maximum value.\n\nExpectColumnUniqueValueCountToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum number of unique values allowed.\n max_value (int or None): The maximum number of unique values allowed.\n strict_min (bool): If True, the column must have strictly more unique value count than min_value to pass.\n strict_max (bool): If True, the column must have strictly fewer unique value count than max_value to pass.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be an int representing the number of unique values the column\n\nSee Also:\n [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum number of unique values allowed.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum number of unique values allowed.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the column must have strictly more unique value count than min_value to pass.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the column must have strictly fewer unique value count than max_value to pass.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "library_metadata": { - "title": "Library Metadata", - "default": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - }, - "type": "object" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnUniqueValueCountToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_unique_value_count_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Aggregate" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column aggregate expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the number of unique values to be between a minimum value and a maximum value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json deleted file mode 100644 index 3499a1d3ed50..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json +++ /dev/null @@ -1,497 +0,0 @@ -{ - "title": "Expect column value lengths to be between", - "description": "Expect the column entries to be strings with length between a minimum value and a maximum value (inclusive).\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToBeBetween is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for a column entry length.\n max_value (int or None): The maximum value for a column entry length.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectColumnValueLengthsToEqual](https://greatexpectations.io/expectations/expect_column_value_lengths_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test\",\n min_value=5,\n max_value=5,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value for a column entry length.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value for a column entry length.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, values must be strictly larger than min_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, values must be strictly smaller than max_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValueLengthsToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_value_lengths_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings with length between a minimum value and a maximum value (inclusive)." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json deleted file mode 100644 index caa81ad27e0e..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json +++ /dev/null @@ -1,460 +0,0 @@ -{ - "title": "Expect column value lengths to equal", - "description": "Expect the column entries to be strings with length equal to the provided value.\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToEqual is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value (int): The expected value for a column entry length.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test2\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value": { - "title": "Value", - "description": "The expected value for a column entry length.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValueLengthsToEqual" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_value_lengths_to_equal" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings with length equal to the provided value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json b/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json deleted file mode 100644 index c6180fc27842..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json +++ /dev/null @@ -1,473 +0,0 @@ -{ - "title": "Expect column value z-scores to be less than", - "description": "Expect the Z-scores of a column's values to be less than a given threshold.\n\nExpectColumnValueZScoresToBeLessThan is a Column Map Expectation for typed-column backends, and also for PandasExecutionEngine where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n threshold (number): A maximum Z-score threshold. All column Z-scores that are lower than this threshold will evaluate successfully.\n double_sided (boolean): A True or False value indicating whether to evaluate double sidedly. Examples: (double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | (double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 -100000000000\n 1 1 -1\n 2 1 0\n 3 3 1\n 4 3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test\",\n threshold=1.96,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test2\",\n threshold=1,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 20.0,\n \"partial_unexpected_list\": [\n -100000000000\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 20.0,\n \"unexpected_percent_nonmissing\": 20.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "threshold": { - "title": "Threshold", - "description": "A maximum Z-score threshold. All column Z-scores that are lower than this threshold will evaluate successfully.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "double_sided": { - "title": "Double Sided", - "description": "A True or False value indicating whether to evaluate double sidedly. Examples: (double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | (double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)", - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValueZScoresToBeLessThan" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_value_z_scores_to_be_less_than" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the Z-scores of a column's values to be less than a given threshold." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "threshold", - "double_sided" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json deleted file mode 100644 index 015525fbb160..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json +++ /dev/null @@ -1,513 +0,0 @@ -{ - "title": "Expect column values to be between", - "description": "Expect the column entries to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnValuesToBeBetween is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value for a column entry.\n max_value (comparable type or None): The maximum value for a column entry.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False.\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and there is no minimum value checked.\n * If max_value is None, then min_value is treated as a lower bound, and there is no maximum value checked.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 25.0,\n \"partial_unexpected_list\": [\n 7.0\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 25.0,\n \"unexpected_percent_nonmissing\": 25.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum value for a column entry.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum value for a column entry.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, values must be strictly larger than min_value.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "The maximum value for a column entry.", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be between a minimum value and a maximum value (inclusive)." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json deleted file mode 100644 index caca615d9217..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json +++ /dev/null @@ -1,519 +0,0 @@ -{ - "title": "Expect column values to be in set", - "description": "Expect each column value to be in a given set.\n\nExpectColumnValuesToBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_not_be_in_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test2\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n 1,\n 1,\n 1\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_set": { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "anyOf": [ - { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "oneOf": [ - { - "title": "Text", - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "examples": [ - [ - "a", - "b", - "c", - "d", - "e" - ], - [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05" - ] - ] - }, - { - "title": "Numbers", - "type": "array", - "items": { - "type": "number" - }, - "minItems": 1, - "examples": [ - [ - 1, - 2, - 3, - 4, - 5 - ], - [ - 1.1, - 2.2, - 3.3, - 4.4, - 5.5 - ], - [ - 1, - 2.2, - 3, - 4.4, - 5 - ] - ] - } - ] - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToBeInSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_be_in_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect each column value to be in a given set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json deleted file mode 100644 index 274ef4729965..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json +++ /dev/null @@ -1,458 +0,0 @@ -{ - "title": "Expect column values to be in type list", - "description": "Expect a column to contain values from a specified type list.\n\nExpectColumnValuesToBeInTypeList is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype provides an unambiguous constraints (any dtype except 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeInTypeList will independently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type_list (list[str] or None): \nA list of strings representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeOfType](https://greatexpectations.io/expectations/expect_column_values_to_be_of_type)\n\nSupported Data Sources:\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test2\",\n type_list=[\"NUMBER\", \"STRING\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test\",\n type_list=[\"NUMBER\", \"DOUBLE\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "type_list": { - "title": "Type List", - "description": "\n A list of strings representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n ", - "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToBeInTypeList" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_be_in_type_list" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect a column to contain values from a specified type list." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "SQL Server", - "BigQuery", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json deleted file mode 100644 index 59de3fdfbe60..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json +++ /dev/null @@ -1,447 +0,0 @@ -{ - "title": "Expect column values to be null", - "description": "Expect the column values to be null.\n\nExpectColumnValuesToBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeNull](https://greatexpectations.io/expectations/expect_column_values_to_not_be_null)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test2\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n true,\n false\n ]\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToBeNull" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_be_null" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Completeness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column values to be null." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json deleted file mode 100644 index 3a0088c88ee1..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json +++ /dev/null @@ -1,460 +0,0 @@ -{ - "title": "Expect column values to be of type", - "description": "Expect a column to contain values of a specified data type.\n\nExpectColumnValuesToBeOfType is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeOfType will\nindependently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type\\_ (str): \nA string representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names. Note that the strings representing these types are sometimes case-sensitive. For instance, with a Pandas backend `timestamp` will be unrecognized and fail the expectation, while `Timestamp` would pass with valid data.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeInTypeList](https://greatexpectations.io/expectations/expect_column_values_to_be_in_type_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test2\",\n type_=\"NUMBER\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test\",\n type_=\"DOUBLE\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "type_": { - "title": "Type ", - "description": "\n A string representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n ", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToBeOfType" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_be_of_type" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect a column to contain values of a specified data type." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "type_" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json deleted file mode 100644 index 79eef36bd3d7..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json +++ /dev/null @@ -1,447 +0,0 @@ -{ - "title": "Expect column values to be unique", - "description": "Expect each column value to be unique.\n\nThis expectation detects duplicates. All duplicated values are counted as exceptions.\n\nFor example, [1, 2, 3, 3, 3] will return [3, 3, 3] in result.exceptions_list, with unexpected_percent = 60.0.\n\nExpectColumnValuesToBeUnique is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 \"A\"\n 1 2 \"A\"\n 2 3 \"B\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"A\",\n \"A\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": true\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToBeUnique" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_be_unique" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect each column value to be unique." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json deleted file mode 100644 index 64c8999ae0c6..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json +++ /dev/null @@ -1,458 +0,0 @@ -{ - "title": "Expect column values to match like pattern", - "description": "Expect the column entries to be strings that match a given like pattern expression.\n\nExpectColumnValuesToMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bee\",\n \"24601\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "like_pattern": { - "title": "Like Pattern", - "description": "The SQL like pattern expression the column entries should match.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToMatchLikePattern" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_match_like_pattern" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that match a given like pattern expression." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "SQL Server", - "Databricks (SQL)", - "BigQuery", - "Snowflake" - ] - } - } - } - }, - "required": [ - "column", - "like_pattern" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json deleted file mode 100644 index 0b9973fc30fe..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json +++ /dev/null @@ -1,478 +0,0 @@ -{ - "title": "Expect column values to match like pattern list", - "description": "Expect the column entries to be strings that match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one like pattern in the list. Use 'all' if it should match each like pattern in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[ad]%\", \"[a]%\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "like_pattern_list": { - "title": "Like Pattern List", - "description": "The list of SQL like pattern expressions the column entries should match.", - "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, - { - "type": "object" - } - ] - }, - "match_on": { - "title": "Match On", - "description": "'any' or 'all'. Use 'any' if the value should match at least one like pattern in the list. Use 'all' if it should match each like pattern in the list.", - "default": "any", - "anyOf": [ - { - "enum": [ - "any", - "all" - ], - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToMatchLikePatternList" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_match_like_pattern_list" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that match any of a provided list of like pattern expressions." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "SQL Server", - "Databricks (SQL)", - "BigQuery", - "Snowflake" - ] - } - } - } - }, - "required": [ - "column", - "like_pattern_list" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json deleted file mode 100644 index fdfaf5758818..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json +++ /dev/null @@ -1,458 +0,0 @@ -{ - "title": "Expect column values to match regex", - "description": "Expect the column entries to be strings that match a given regular expression.\n\nValid matches can be found anywhere in the string, for example \"[at]+\" will identify the following strings as expected: \"cat\", \"hat\", \"aa\", \"a\", and \"t\", and the following strings as unexpected: \"fish\", \"dog\".\n\nExpectColumnValuesToMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "regex": { - "title": "Regex", - "description": "The regular expression the column entries should match.", - "default": "(?s).*", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToMatchRegex" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_match_regex" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that match a given regular expression." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "BigQuery", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json deleted file mode 100644 index 5a23d4e31b15..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json +++ /dev/null @@ -1,478 +0,0 @@ -{ - "title": "Expect column values to match regex list", - "description": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions.\n\nMatches can be anywhere in the string.\n\nExpectColumnValuesToMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one regular expression in the list. Use 'all' if it should match each regular expression in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test2\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "regex_list": { - "title": "Regex List", - "description": "The list of regular expressions which the column entries should match.", - "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, - { - "type": "object" - } - ] - }, - "match_on": { - "title": "Match On", - "description": "'any' or 'all'. Use 'any' if the value should match at least one regular expression in the list. Use 'all' if it should match each regular expression in the list.", - "default": "any", - "anyOf": [ - { - "enum": [ - "any", - "all" - ], - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToMatchRegexList" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_match_regex_list" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "BigQuery", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "regex_list" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json deleted file mode 100644 index c018c345a1fe..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json +++ /dev/null @@ -1,519 +0,0 @@ -{ - "title": "Expect column values to not be in set", - "description": "Expect column entries to not be in the set.\n\nExpectColumnValuesToNotBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test2\",\n value_set=[2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n 2,\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "value_set": { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "anyOf": [ - { - "title": "Value Set", - "description": "A set of objects used for comparison.", - "oneOf": [ - { - "title": "Text", - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "examples": [ - [ - "a", - "b", - "c", - "d", - "e" - ], - [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05" - ] - ] - }, - { - "title": "Numbers", - "type": "array", - "items": { - "type": "number" - }, - "minItems": 1, - "examples": [ - [ - 1, - 2, - 3, - 4, - 5 - ], - [ - 1.1, - 2.2, - 3.3, - 4.4, - 5.5 - ], - [ - 1, - 2.2, - 3, - 4.4, - 5 - ] - ] - } - ] - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToNotBeInSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_not_be_in_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect column entries to not be in the set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "value_set" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json deleted file mode 100644 index 583a1617bac6..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json +++ /dev/null @@ -1,447 +0,0 @@ -{ - "title": "Expect column values to not be null", - "description": "Expect the column values to not be null.\n\nTo be counted as an exception, values must be explicitly null or missing, such as a NULL in PostgreSQL or an\nnp.NaN in pandas. Empty strings don't count as null unless they have been coerced to a null type.\n\nExpectColumnValuesToNotBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeNull](https://greatexpectations.io/expectations/expect_column_values_to_be_null)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n null\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n null,\n null\n ]\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToNotBeNull" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_not_be_null" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Completeness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column values to not be null." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json deleted file mode 100644 index 8dc43d0ff0fe..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json +++ /dev/null @@ -1,457 +0,0 @@ -{ - "title": "Expect column values to not match like pattern", - "description": "Expect the column entries to be strings that do NOT match a given like pattern expression.\n\nExpectColumnValuesToNotMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"ade\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "like_pattern": { - "title": "Like Pattern", - "description": "The SQL like pattern expression the column entries should NOT match.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToNotMatchLikePattern" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_not_match_like_pattern" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that do NOT match a given like pattern expression." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "SQL Server", - "Databricks (SQL)", - "Snowflake" - ] - } - } - } - }, - "required": [ - "column", - "like_pattern" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json deleted file mode 100644 index 5d18e902d412..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json +++ /dev/null @@ -1,460 +0,0 @@ -{ - "title": "Expect column values to not match like pattern list", - "description": "Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToNotMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "like_pattern_list": { - "title": "Like Pattern List", - "description": "The list of SQL like pattern expressions the column entries should NOT match.", - "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToNotMatchLikePatternList" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_not_match_like_pattern_list" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "SQL Server", - "Databricks (SQL)", - "Snowflake" - ] - } - } - } - }, - "required": [ - "column", - "like_pattern_list" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json deleted file mode 100644 index 36e0460a1882..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json +++ /dev/null @@ -1,457 +0,0 @@ -{ - "title": "Expect column values to not match regex", - "description": "Expect the column entries to be strings that do NOT match a given regular expression.\n\nThe regex must not match any portion of the provided string. For example, \"[at]+\" would identify the following strings as expected: \"fish\", \"dog\", and the following as unexpected: \"cat\", \"hat\".\n\nExpectColumnValuesToNotMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "regex": { - "title": "Regex", - "description": "The regular expression the column entries should NOT match.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToNotMatchRegex" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_not_match_regex" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that do NOT match a given regular expression." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column", - "regex" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json deleted file mode 100644 index 88fd01070bf9..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json +++ /dev/null @@ -1,460 +0,0 @@ -{ - "title": "Expect column values to not match regex list", - "description": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string.\n\nExpectColumnValuesToNotMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should not match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test2\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column": { - "title": "Column", - "description": "The column name.", - "minLength": 1, - "type": "string" - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "regex_list": { - "title": "Regex List", - "description": "The list of regular expressions which the column entries should not match.", - "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectColumnValuesToNotMatchRegexList" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_column_values_to_not_match_regex_list" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "column", - "description": "Column Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Validity" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "column map expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "Databricks (SQL)", - "SQLite" - ] - } - } - } - }, - "required": [ - "column", - "regex_list" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json b/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json deleted file mode 100644 index fe662a781351..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json +++ /dev/null @@ -1,459 +0,0 @@ -{ - "title": "Expect compound columns to be unique", - "description": "Expect the compound columns to be unique.\n\nExpectCompoundColumnsToBeUnique is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2 test3 test4\n 0 1 1 4 1\n 1 2 1 7 1\n 2 4 1 -3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test2\", \"test4\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_list": { - "title": "Column List", - "description": "Set of columns to be checked.", - "type": "array", - "items": { - "type": "string" - } - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "ignore_row_if": { - "title": "Ignore Row If", - "default": "all_values_are_missing", - "enum": [ - "all_values_are_missing", - "any_value_is_missing", - "never" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectCompoundColumnsToBeUnique" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_compound_columns_to_be_unique" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "multicolumn", - "description": "Multicolumn Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "multi-column expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the compound columns to be unique." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column_list" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json b/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json deleted file mode 100644 index bf0a9c1dae29..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json +++ /dev/null @@ -1,480 +0,0 @@ -{ - "title": "Expect multicolumn sum to equal", - "description": "Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total.\n\nExpectMulticolumnSumToEqual is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n sum_total (int or float): Expected sum of columns\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2 test3\n 0 1 2 4\n 1 2 -2 7\n 2 4 4 -3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7,\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_list": { - "title": "Column List", - "description": "Set of columns to be checked.", - "type": "array", - "items": { - "type": "string" - } - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "ignore_row_if": { - "title": "Ignore Row If", - "description": "If specified, sets the condition on which a given row is to be ignored.", - "default": "all_values_are_missing", - "anyOf": [ - { - "enum": [ - "all_values_are_missing", - "any_value_is_missing", - "never" - ], - "type": "string" - }, - { - "type": "object" - } - ] - }, - "sum_total": { - "title": "Sum Total", - "description": "Expected sum of columns", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectMulticolumnSumToEqual" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_multicolumn_sum_to_equal" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "multicolumn", - "description": "Multicolumn Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Numeric" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "multi-column expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column_list", - "sum_total" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json b/great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json deleted file mode 100644 index 19d92630d381..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json +++ /dev/null @@ -1,262 +0,0 @@ -{ - "title": "Expect query results to match comparison", - "description": "This Expectation will check if the results of a query matches the results of a query against another Data Source.\n\nExpectQueryResultsToMatchComparison executes one SQL query for each of two Data Sources and compares their results. It validates that the results from the current Data Source's query matches those from the comparison Data Source's query, above a specified threshold.\n\nEach record returned by the 'base_query' will be compared to each record returned by the 'comparison_query'.\n\nThe maximum number of records that will be returned for comparison from each query is 200.\n\nThe order of records returned does not matter unless the number of records returned would be greater than 200.\n\nColumn names do not matter, but the order of the columns does.\n\nMatch percentage (100% - unexpected percent) is compared to the mostly threshold to determine pass/fail.\n e.g.\nunexpected percent = 10%, mostly = 80%, (100% - 10%) > 80% - pass\nunexpected percent = 10%, mostly = 91%, (100% - 10%) < 91% - fail\n\n\nThe match percentage is computed by dividing the number of matching records by the maximum number of records in either the comparison result or the base result.\n e.g.\nComparison Row Count: 100 Base Row Count: 100 Matches: 100 Match Percentage: 100%\nComparison Row Count: 25 Base Row Count: 100 Matches: 25 Match Percentage: 25%\nComparison Row Count: 100 Base Row Count: 25 Matches: 1 Match Percentage: 1%\n\nIf both the base and comparison queries return 0 records, it is considered a successful result.\n\n\nArgs:\n base_query (str): A SQL query to be executed for this Data Asset.\n comparison_data_source_name (str): The name of the comparison Data Source to compare this Asset against.\n comparison_query (str): A SQL query to be executed for the comparison Data Source.\n mostly (float): Successful if at least `mostly` fraction of values match the Expectation.\n\nOther Parameters:\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\nSupported Data Sources:\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n Multi-source", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "base_query": { - "title": "Base Query", - "description": "A SQL query to be executed for this Data Asset.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "comparison_data_source_name": { - "title": "Comparison Data Source Name", - "description": "The name of the comparison Data Source to compare this Asset against.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "comparison_query": { - "title": "Comparison Query", - "description": "A SQL query to be executed for the comparison Data Source.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectQueryResultsToMatchComparison" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_query_results_to_match_comparison" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Multi-source" - ] - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "This Expectation will check if the results of a query matches the results of a query against another Data Source." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Snowflake", - "Databricks (SQL)", - "Redshift", - "SQLite" - ] - } - } - } - }, - "required": [ - "base_query", - "comparison_data_source_name", - "comparison_query" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json b/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json deleted file mode 100644 index 5cdfe472ec0d..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json +++ /dev/null @@ -1,462 +0,0 @@ -{ - "title": "Expect select column values to be unique within record", - "description": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated.\n\nExpectSelectColumnValuesToBeUniqueWithinRecord is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): The column names to evaluate.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nFor example:\n::\n\n A B C\n 1 1 2 Fail\n 1 2 3 Pass\n 8 2 7 Pass\n 1 2 3 Pass\n 4 4 4 Fail\nExample Data:\n test test2 test3\n 0 1 1 2\n 1 1 2 3\n 2 8 2 7\n\nCode Examples:\n Passing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 1,\n \"test2\": 1,\n \"test3\": 2\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_list": { - "title": "Column List", - "description": "The column names to evaluate.", - "type": "array", - "items": { - "type": "string" - } - }, - "mostly": { - "title": "Mostly", - "description": "Successful if at least `mostly` fraction of values match the Expectation.", - "default": 1, - "anyOf": [ - { - "type": "number", - "minimum": 0.0, - "maximum": 1.0 - }, - { - "type": "object" - } - ], - "multipleOf": 0.01 - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "ignore_row_if": { - "title": "Ignore Row If", - "description": "If specified, sets the condition on which a given row is to be ignored.", - "default": "all_values_are_missing", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectSelectColumnValuesToBeUniqueWithinRecord" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_select_column_values_to_be_unique_within_record" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "multicolumn", - "description": "Multicolumn Map" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Uniqueness" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "multi-column expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "column_list" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json deleted file mode 100644 index 6864ee09ffc2..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json +++ /dev/null @@ -1,267 +0,0 @@ -{ - "title": "Expect table column count to be between", - "description": "Expect the number of columns in a table to be between two values.\n\nExpectTableColumnCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of columns, inclusive.\n max_value (int or None): The maximum number of columns, inclusive.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable columns has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable columns has no maximum.\n\nSee Also:\n [ExpectTableColumnCountToEqual](https://greatexpectations.io/expectations/expect_table_column_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum number of columns, inclusive.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum number of columns, inclusive.", - "anyOf": [ - { - "type": "number" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableColumnCountToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_column_count_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the number of columns in a table to be between two values." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json deleted file mode 100644 index 00ed17f3ed42..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json +++ /dev/null @@ -1,242 +0,0 @@ -{ - "title": "Expect table column count to equal", - "description": "Expect the number of columns in a table to equal a value.\n\nExpectTableColumnCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of columns.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableColumnCountToBeBetween](https://greatexpectations.io/expectations/expect_table_column_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {\n \"observed_value\": 2\n }\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {\n \"observed_value\": 2\n }\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "value": { - "title": "Value", - "description": "The expected number of columns.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableColumnCountToEqual" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_column_count_to_equal" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the number of columns in a table to equal a value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "value" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json deleted file mode 100644 index 711f4ee0b9ad..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json +++ /dev/null @@ -1,245 +0,0 @@ -{ - "title": "Expect table columns to match ordered list", - "description": "Expect the columns in a table to exactly match a specified list.\n\nExpectTableColumnsToMatchOrderedList is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_list (list of str): The column names, in the correct order.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test\", \"test2\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test2\", \"test\", \"test3\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"Unnamed: 0\",\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": [\n {\n \"Expected Column Position\": 1,\n \"Expected\": \"test2\",\n \"Found\": \"test\"\n },\n {\n \"Expected Column Position\": 2,\n \"Expected\": \"test\",\n \"Found\": \"test2\"\n },\n {\n \"Expected Column Position\": 3,\n \"Expected\": \"test3\",\n \"Found\": null\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_list": { - "title": "Column List", - "description": "The column names, in the correct order.", - "anyOf": [ - { - "type": "array", - "items": {} - }, - { - "type": "array", - "items": {}, - "uniqueItems": true - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableColumnsToMatchOrderedList" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_columns_to_match_ordered_list" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the columns in a table to exactly match a specified list." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json deleted file mode 100644 index 0a950321cc6a..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json +++ /dev/null @@ -1,258 +0,0 @@ -{ - "title": "Expect table columns to match set", - "description": "Expect the columns in a table to match an unordered set.\n\nExpectTableColumnsToMatchSet is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_set (list of str): The column names, in any order. In SQL datasources, if the column names are double quoted, for example '\"column_name\"', a case sensitive match is done. Otherwise a case insensitive match is done.\n exact_match (boolean): If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass. Default True.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test\"],\n exact_match=False\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test2\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test2\", \"test3\"],\n exact_match=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test\"\n ],\n \"missing\": [\n \"test3\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "column_set": { - "title": "Column Set", - "description": "The column names, in any order. In SQL datasources, if the column names are double quoted, for example '\"column_name\"', a case sensitive match is done. Otherwise a case insensitive match is done.", - "anyOf": [ - { - "type": "array", - "items": {} - }, - { - "type": "array", - "items": {}, - "uniqueItems": true - }, - { - "type": "object" - } - ] - }, - "exact_match": { - "title": "Exact Match", - "description": "If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass.", - "default": true, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableColumnsToMatchSet" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_columns_to_match_set" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Schema" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the columns in a table to match an unordered set." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json deleted file mode 100644 index fcdfaaa3d77d..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json +++ /dev/null @@ -1,480 +0,0 @@ -{ - "title": "Expect table row count to be between", - "description": "Expect the number of rows to be between two values.\n\nExpectTableRowCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of rows, inclusive.\n max_value (int or None): The maximum number of rows, inclusive.\n strict_min (boolean): If True, the row count must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the row count must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n min_value=1,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "min_value": { - "title": "Min Value", - "description": "The minimum number of rows, inclusive.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "max_value": { - "title": "Max Value", - "description": "The maximum number of rows, inclusive.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - }, - { - "type": "string", - "format": "date-time" - } - ] - }, - "strict_min": { - "title": "Strict Min", - "description": "If True, the row count must be strictly smaller than max_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "strict_max": { - "title": "Strict Max", - "description": "If True, the row count must be strictly larger than min_value, default=False", - "default": false, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "object" - } - ] - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableRowCountToBeBetween" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_row_count_to_be_between" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Volume" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the number of rows to be between two values." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json deleted file mode 100644 index 4099df3ee2ab..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json +++ /dev/null @@ -1,437 +0,0 @@ -{ - "title": "Expect table row count to equal", - "description": "Expect the number of rows to equal a value.\n\nExpectTableRowCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of rows.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "value": { - "title": "Value", - "description": "The expected number of rows.", - "anyOf": [ - { - "type": "integer" - }, - { - "type": "object" - } - ] - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableRowCountToEqual" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_row_count_to_equal" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Volume" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the number of rows to equal a value." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "MySQL", - "SQL Server", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "Redshift" - ] - } - } - } - }, - "required": [ - "value" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json deleted file mode 100644 index 5a66ca68c5d7..000000000000 --- a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json +++ /dev/null @@ -1,435 +0,0 @@ -{ - "title": "Expect table row count to equal other table", - "description": "Expect the number of rows to equal the number in another table within the same database.\n\nExpectTableRowCountToEqualOtherTable is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n other_table_name (str): The name of the other table. Other table must be located within the same database.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test_table\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_two\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_three\n test test2\n 0 1.00 2\n 1 2.30 5\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_two\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_three\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "other_table_name": { - "title": "Other Table Name", - "description": "The name of the other table. Other table must be located within the same database.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "row_condition": { - "title": "Row Condition", - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/ComparisonCondition" - }, - { - "$ref": "#/definitions/NullityCondition" - }, - { - "$ref": "#/definitions/AndCondition" - }, - { - "$ref": "#/definitions/OrCondition" - }, - { - "$ref": "#/definitions/PassThroughCondition" - } - ] - }, - "condition_parser": { - "title": "Condition Parser", - "enum": [ - "great_expectations", - "great_expectations__experimental__", - "pandas", - "spark" - ], - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "ExpectTableRowCountToEqualOtherTable" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "expect_table_row_count_to_equal_other_table" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "Volume" - ] - }, - "library_metadata": { - "title": "Library Metadata", - "type": "object", - "const": { - "maturity": "production", - "tags": [ - "core expectation", - "table expectation", - "multi-table expectation" - ], - "contributors": [ - "@great_expectations" - ], - "requirements": [], - "has_full_test_suite": true, - "manually_reviewed_code": true - } - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "Expect the number of rows to equal the number in another table within the same database." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "SQLite", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "SQL Server", - "Databricks (SQL)", - "Snowflake" - ] - } - } - } - }, - "required": [ - "other_table_name" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - }, - "Column": { - "title": "Column", - "description": "--Public API--\nSpecify the column in a condition statement.", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "Operator": { - "title": "Operator", - "description": "An enumeration.", - "enum": [ - "==", - "!=", - "<", - "<=", - ">", - ">=", - "IN", - "NOT_IN" - ], - "type": "string" - }, - "ComparisonCondition": { - "title": "ComparisonCondition", - "description": "--Public API--Condition representing the comparison of a column with a parameter.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "comparison", - "enum": [ - "comparison" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "operator": { - "$ref": "#/definitions/Operator" - }, - "parameter": { - "title": "Parameter" - } - }, - "required": [ - "column", - "operator", - "parameter" - ] - }, - "NullityCondition": { - "title": "NullityCondition", - "description": "--Public API--Condition representing the whether or not a column is null.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "nullity", - "enum": [ - "nullity" - ], - "type": "string" - }, - "column": { - "$ref": "#/definitions/Column" - }, - "is_null": { - "title": "Is Null", - "type": "boolean" - } - }, - "required": [ - "column", - "is_null" - ] - }, - "Condition": { - "title": "Condition", - "description": "Base class for conditions.", - "type": "object", - "properties": {} - }, - "AndCondition": { - "title": "AndCondition", - "description": "--Public API--Represents an AND condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "and", - "enum": [ - "and" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "OrCondition": { - "title": "OrCondition", - "description": "--Public API--Represents an OR condition composed of multiple conditions.", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "or", - "enum": [ - "or" - ], - "type": "string" - }, - "conditions": { - "title": "Conditions", - "type": "array", - "items": { - "$ref": "#/definitions/Condition" - } - } - }, - "required": [ - "conditions" - ] - }, - "PassThroughCondition": { - "title": "PassThroughCondition", - "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", - "type": "object", - "properties": { - "type": { - "title": "Type", - "default": "pass_through", - "enum": [ - "pass_through" - ], - "type": "string" - }, - "pass_through_filter": { - "title": "Pass Through Filter", - "type": "string" - } - }, - "required": [ - "pass_through_filter" - ] - } - } -} diff --git a/great_expectations/expectations/core/schemas/README.md b/great_expectations/expectations/core/schemas/README.md deleted file mode 100644 index 47746ac1fbbb..000000000000 --- a/great_expectations/expectations/core/schemas/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Expectation JSON Schemas - -## Specification -Expectation JSON schemas should conform to the [JsonSchema7 interface](https://jsonforms.io/api/core/interfaces/jsonschema7). We ensure this by validating each schema using the python [jsonschema](https://python-jsonschema.readthedocs.io/en/stable/) library (e.g. using `Draft7Validator.check_schema()`). - -## Metadata Property -Properties on the Expectation schemas represent class instance variable definitions except for one special property: `metadata` - -The `metadata` property is itself an `object` containing many `properties`. The `metadata` `properties` are all defined by a `const` which does not change from one Expectation instance to another. diff --git a/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json b/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json deleted file mode 100644 index d8d4b0da6e86..000000000000 --- a/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json +++ /dev/null @@ -1,223 +0,0 @@ -{ - "title": "Custom Expectation with SQL", - "description": "This Expectation will fail validation if the query returns one or more rows. The WHERE clause defines the fail criteria.\n\nUnexpectedRowsExpectations facilitate the execution of SQL or Spark-SQL queries as the core logic for an Expectation. UnexpectedRowsExpectations must implement a `_validate(...)` method containing logic for determining whether data returned by the executed query is successfully validated. One is written by default, but can be overridden.\n\nA successful validation is one where the unexpected_rows_query returns no rows.\n\nUnexpectedRowsExpectation is a [Batch Expectation](https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations).\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n unexpected_rows_query (str): A SQL or Spark-SQL query to be executed for validation.\n\nOther Parameters:\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\nSupported Data Sources:\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n SQL", - "type": "object", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "meta": { - "title": "Meta", - "type": "object" - }, - "notes": { - "title": "Notes", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "result_format": { - "title": "Result Format", - "default": "BASIC", - "anyOf": [ - { - "$ref": "#/definitions/ResultFormat" - }, - { - "type": "object" - } - ] - }, - "description": { - "title": "Description", - "description": "A short description of your Expectation", - "type": "string" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": false, - "type": "boolean" - }, - "rendered_content": { - "title": "Rendered Content", - "type": "array", - "items": { - "type": "object" - } - }, - "severity": { - "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", - "default": "critical", - "allOf": [ - { - "$ref": "#/definitions/FailureSeverity" - } - ] - }, - "windows": { - "title": "Windows", - "description": "Definition(s) for evaluation of temporal windows", - "type": "array", - "items": { - "$ref": "#/definitions/Window" - } - }, - "batch_id": { - "title": "Batch Id", - "type": "string" - }, - "unexpected_rows_query": { - "title": "Unexpected Rows Query", - "description": "A SQL or Spark-SQL query to be executed for validation.", - "anyOf": [ - { - "type": "string" - }, - { - "type": "object" - } - ] - }, - "metadata": { - "type": "object", - "properties": { - "expectation_class": { - "title": "Expectation Class", - "type": "string", - "const": "UnexpectedRowsExpectation" - }, - "expectation_type": { - "title": "Expectation Type", - "type": "string", - "const": "unexpected_rows_expectation" - }, - "domain_type": { - "title": "Domain Type", - "type": "string", - "const": "table", - "description": "Batch" - }, - "data_quality_issues": { - "title": "Data Quality Issues", - "type": "array", - "const": [ - "SQL" - ] - }, - "short_description": { - "title": "Short Description", - "type": "string", - "const": "This Expectation will fail validation if the query returns one or more rows. The WHERE clause defines the fail criteria." - }, - "supported_data_sources": { - "title": "Supported Data Sources", - "type": "array", - "const": [ - "Spark", - "PostgreSQL", - "Amazon Aurora PostgreSQL", - "Citus", - "AlloyDB", - "Neon", - "Redshift", - "MySQL", - "BigQuery", - "Snowflake", - "Databricks (SQL)", - "SQL Server" - ] - } - } - } - }, - "required": [ - "unexpected_rows_query" - ], - "additionalProperties": false, - "definitions": { - "ResultFormat": { - "title": "ResultFormat", - "description": "An enumeration.", - "enum": [ - "BOOLEAN_ONLY", - "BASIC", - "COMPLETE", - "SUMMARY" - ], - "type": "string" - }, - "FailureSeverity": { - "title": "FailureSeverity", - "description": "Severity levels for Expectation failures.", - "enum": [ - "critical", - "warning", - "info" - ], - "type": "string" - }, - "Offset": { - "title": "Offset", - "description": "A threshold in which a metric will be considered passable", - "type": "object", - "properties": { - "positive": { - "title": "Positive", - "type": "number" - }, - "negative": { - "title": "Negative", - "type": "number" - } - }, - "required": [ - "positive", - "negative" - ], - "additionalProperties": false - }, - "Window": { - "title": "Window", - "description": "A definition for a temporal window across <`range`> number of previous invocations", - "type": "object", - "properties": { - "constraint_fn": { - "title": "Constraint Fn", - "type": "string" - }, - "parameter_name": { - "title": "Parameter Name", - "type": "string" - }, - "range": { - "title": "Range", - "type": "integer" - }, - "offset": { - "$ref": "#/definitions/Offset" - }, - "strict": { - "title": "Strict", - "default": false, - "type": "boolean" - } - }, - "required": [ - "constraint_fn", - "parameter_name", - "range", - "offset" - ], - "additionalProperties": false - } - } -} diff --git a/great_expectations/expectations/core/schemas/__init__.py b/great_expectations/expectations/core/schemas/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py b/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py index 6cfc1032ba38..7f615ec995ca 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/__init__.py @@ -19,9 +19,6 @@ from .column_min import ColumnMin from .column_most_common_value import ColumnMostCommonValue from .column_non_null_count import ColumnNonNullCount -from .column_parameterized_distribution_ks_test_p_value import ( - ColumnParameterizedDistributionKSTestPValue, -) from .column_partition import ColumnPartition from .column_proportion_of_non_null_values import ColumnNonNullProportion from .column_proportion_of_unique_values import ColumnUniqueProportion diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py deleted file mode 100644 index ad4626af6d34..000000000000 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Final - -from great_expectations.execution_engine import PandasExecutionEngine -from great_expectations.expectations.metrics.column_aggregate_metric_provider import ( - ColumnAggregateMetricProvider, - column_aggregate_value, -) -from great_expectations.expectations.metrics.util import ( - is_valid_continuous_partition_object, -) - -logger = logging.getLogger(__name__) - - -import numpy as np -from scipy import stats - -NP_RANDOM_GENERATOR: Final = np.random.default_rng() - - -class ColumnBootstrappedKSTestPValue(ColumnAggregateMetricProvider): - """MetricProvider Class for Aggregate Standard Deviation metric""" - - metric_name = "column.bootstrapped_ks_test_p_value" - value_keys = ("partition_object", "p", "bootstrap_sample", "bootstrap_sample_size") - - @column_aggregate_value(engine=PandasExecutionEngine) - def _pandas( # noqa: C901 # FIXME CoP - cls, - column, - partition_object=None, - p=0.05, - bootstrap_samples=None, - bootstrap_sample_size=None, - **kwargs, - ): - if not is_valid_continuous_partition_object(partition_object): - raise ValueError("Invalid continuous partition object.") # noqa: TRY003 # FIXME CoP - - # TODO: consider changing this into a check that tail_weights does not exist exclusively, by moving this check into is_valid_continuous_partition_object # noqa: E501 # FIXME CoP - if (partition_object["bins"][0] == -np.inf) or (partition_object["bins"][-1] == np.inf): - raise ValueError("Partition endpoints must be finite.") # noqa: TRY003 # FIXME CoP - - if "tail_weights" in partition_object and np.sum(partition_object["tail_weights"]) > 0: - raise ValueError("Partition cannot have tail weights -- endpoints must be finite.") # noqa: TRY003 # FIXME CoP - - test_cdf = np.append(np.array([0]), np.cumsum(partition_object["weights"])) - - def estimated_cdf(x): - return np.interp(x, partition_object["bins"], test_cdf) - - if bootstrap_samples is None: - bootstrap_samples = 1000 - - if bootstrap_sample_size is None: - # Sampling too many elements (or not bootstrapping) will make the test too sensitive to the fact that we've # noqa: E501 # FIXME CoP - # compressed via a partition. - - # Sampling too few elements will make the test insensitive to significant differences, especially # noqa: E501 # FIXME CoP - # for nonoverlapping ranges. - bootstrap_sample_size = len(partition_object["weights"]) * 2 - - results = [ - stats.kstest( - NP_RANDOM_GENERATOR.choice(column, size=bootstrap_sample_size), - estimated_cdf, - )[1] - for _ in range(bootstrap_samples) - ] - - test_result = (1 + sum(x >= p for x in results)) / (bootstrap_samples + 1) - - hist, _bin_edges = np.histogram(column, partition_object["bins"]) - below_partition = len(np.where(column < partition_object["bins"][0])[0]) - above_partition = len(np.where(column > partition_object["bins"][-1])[0]) - - # Expand observed partition to report, if necessary - if below_partition > 0 and above_partition > 0: - observed_bins = [np.min(column)] + partition_object["bins"] + [np.max(column)] - observed_weights = np.concatenate(([below_partition], hist, [above_partition])) / len( - column - ) - elif below_partition > 0: - observed_bins = [np.min(column)] + partition_object["bins"] - observed_weights = np.concatenate(([below_partition], hist)) / len(column) - elif above_partition > 0: - observed_bins = partition_object["bins"] + [np.max(column)] - observed_weights = np.concatenate((hist, [above_partition])) / len(column) - else: - observed_bins = partition_object["bins"] - observed_weights = hist / len(column) - - observed_cdf_values = np.cumsum(observed_weights) - - # TODO: How should this metric's return_obj be structured? - return_obj = { - "observed_value": test_result, - "details": { - "bootstrap_samples": bootstrap_samples, - "bootstrap_sample_size": bootstrap_sample_size, - "observed_partition": { - "bins": observed_bins, - "weights": observed_weights.tolist(), - }, - "expected_partition": { - "bins": partition_object["bins"], - "weights": partition_object["weights"], - }, - "observed_cdf": { - "x": observed_bins, - "cdf_values": [0] + observed_cdf_values.tolist(), - }, - "expected_cdf": { - "x": partition_object["bins"], - "cdf_values": test_cdf.tolist(), - }, - }, - } - - return return_obj diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py index 34356966cc3d..dbe82b783020 100644 --- a/great_expectations/self_check/util.py +++ b/great_expectations/self_check/util.py @@ -1136,14 +1136,11 @@ def candidate_test_is_on_temporary_notimplemented_list_v2_api(context, expectati "expect_column_values_to_match_json_schema", "expect_column_stdev_to_be_between", "expect_column_most_common_value_to_be_in_set", - "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", - "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", "expect_column_pair_values_to_be_equal", "expect_column_pair_values_a_to_be_greater_than_b", "expect_select_column_values_to_be_unique_within_record", "expect_compound_columns_to_be_unique", "expect_multicolumn_values_to_be_unique", - "expect_column_pair_cramers_phi_value_to_be_less_than", "expect_multicolumn_sum_to_equal", "expect_column_value_z_scores_to_be_less_than", ] @@ -1155,9 +1152,6 @@ def candidate_test_is_on_temporary_notimplemented_list_v2_api(context, expectati expectations_not_implemented_v2_sql.append( "expect_column_kl_divergence_to_be_less_than" ) # TODO: unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 - expectations_not_implemented_v2_sql.append( - "expect_column_chisquare_test_p_value_to_be_greater_than" - ) # TODO: unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 expectations_not_implemented_v2_sql.append( "expect_column_values_to_be_between" ) # TODO: error unique to bigquery -- https://github.com/great-expectations/great_expectations/issues/3261 @@ -1182,10 +1176,7 @@ def candidate_test_is_on_temporary_notimplemented_list_v2_api(context, expectati return expectation_type in [ "expect_column_values_to_be_dateutil_parseable", "expect_column_values_to_be_json_parseable", - "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", - "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", "expect_compound_columns_to_be_unique", - "expect_column_pair_cramers_phi_value_to_be_less_than", "expect_table_row_count_to_equal_other_table", "expect_column_value_z_scores_to_be_less_than", ] From 7797520f98985a80cc39533773182252548fd529 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 2 Apr 2026 20:15:26 +0200 Subject: [PATCH 2/3] [MAINTENANCE] Fix CI: remove dead schema tests referencing deleted schemas directory --- .../core/test_core_model_schemas.py | 49 ------------------- 1 file changed, 49 deletions(-) diff --git a/tests/expectations/core/test_core_model_schemas.py b/tests/expectations/core/test_core_model_schemas.py index e7441e6f0af2..1a2a160c3f51 100644 --- a/tests/expectations/core/test_core_model_schemas.py +++ b/tests/expectations/core/test_core_model_schemas.py @@ -1,29 +1,11 @@ -import json -from pathlib import Path - -import jsonschema import pytest -from jsonschema import Draft7Validator from great_expectations.expectations import core -from great_expectations.expectations.core import schemas from great_expectations.expectations.expectation import MetaExpectation expectation_dictionary = dict(core.__dict__) -@pytest.fixture -def safer_draft_7_validator() -> type[Draft7Validator]: - validator = Draft7Validator - validator.META_SCHEMA = { - **Draft7Validator.META_SCHEMA, - # this ensures that only specified properties are used (e.g. multipleOf, not multiple_of) - # otherwise, the spec says unspecified properties should be ignored - "additionalProperties": False, - } - return validator - - @pytest.mark.unit def test_all_core_model_schemas_are_serializable(): all_models = [ @@ -35,34 +17,3 @@ def test_all_core_model_schemas_are_serializable(): assert len(all_models) > 50 for model in all_models: model.schema_json() - - -@pytest.mark.filesystem # ~4s -def test_schemas_updated(): - all_models = { - cls_name: expectation - for cls_name, expectation in expectation_dictionary.items() - if isinstance(expectation, MetaExpectation) - } - schema_file_paths = Path(schemas.__file__).parent.glob("*.json") - all_schemas = {file_path.stem: file_path.read_text() for file_path in schema_file_paths} - for cls_name, schema in all_schemas.items(): - # converting to dicts for easier comparision on failure - new_schema = json.loads(all_models[cls_name].schema_json()) - old_schema = json.loads(schema) - assert new_schema == old_schema, "json schemas not updated, run `invoke schemas --sync`" - - -@pytest.mark.unit -def test_schemas_valid_spec(safer_draft_7_validator: type[Draft7Validator]): - # https://json-schema.org/draft-07 - # https://jsonforms.io/api/core/interfaces/jsonschema7 - schema_file_paths = Path(schemas.__file__).parent.glob("*.json") - for file_path in schema_file_paths: - with open(file_path) as schema_file: - try: - safer_draft_7_validator.check_schema(json.load(schema_file)) - except jsonschema.exceptions.SchemaError as e: - raise AssertionError( - f"Invalid json schema for `{file_path.name}`: {e.message}" - ) from e From 00969ffc8325b3eba2ee83b5dd22957b35c11b5c Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:53:20 +0200 Subject: [PATCH 3/3] [MAINTENANCE] Restore schemas directory and associated tests The schemas directory alerts developers to schema changes created by updating Expectations (via 'invoke schemas --sync'). It was incorrectly flagged as dead code because nothing imports it at runtime. --- .../ExpectColumnDistinctValuesToBeInSet.json | 503 +++++++++++++++++ ...xpectColumnDistinctValuesToContainSet.json | 503 +++++++++++++++++ .../ExpectColumnDistinctValuesToEqualSet.json | 503 +++++++++++++++++ .../ExpectColumnKLDivergenceToBeLessThan.json | 531 ++++++++++++++++++ .../schemas/ExpectColumnMaxToBeBetween.json | 497 ++++++++++++++++ .../schemas/ExpectColumnMeanToBeBetween.json | 497 ++++++++++++++++ .../ExpectColumnMedianToBeBetween.json | 497 ++++++++++++++++ .../schemas/ExpectColumnMinToBeBetween.json | 497 ++++++++++++++++ .../ExpectColumnMostCommonValueToBeInSet.json | 515 +++++++++++++++++ ...pectColumnPairValuesAToBeGreaterThanB.json | 483 ++++++++++++++++ .../ExpectColumnPairValuesToBeEqual.json | 472 ++++++++++++++++ .../ExpectColumnPairValuesToBeInSet.json | 491 ++++++++++++++++ ...nProportionOfNonNullValuesToBeBetween.json | 484 ++++++++++++++++ ...mnProportionOfUniqueValuesToBeBetween.json | 498 ++++++++++++++++ ...ExpectColumnQuantileValuesToBeBetween.json | 493 ++++++++++++++++ .../schemas/ExpectColumnStdevToBeBetween.json | 481 ++++++++++++++++ .../schemas/ExpectColumnSumToBeBetween.json | 497 ++++++++++++++++ .../core/schemas/ExpectColumnToExist.json | 248 ++++++++ ...pectColumnUniqueValueCountToBeBetween.json | 498 ++++++++++++++++ .../ExpectColumnValueLengthsToBeBetween.json | 497 ++++++++++++++++ .../ExpectColumnValueLengthsToEqual.json | 460 +++++++++++++++ .../ExpectColumnValueZScoresToBeLessThan.json | 473 ++++++++++++++++ .../ExpectColumnValuesToBeBetween.json | 513 +++++++++++++++++ .../schemas/ExpectColumnValuesToBeInSet.json | 519 +++++++++++++++++ .../ExpectColumnValuesToBeInTypeList.json | 458 +++++++++++++++ .../schemas/ExpectColumnValuesToBeNull.json | 447 +++++++++++++++ .../schemas/ExpectColumnValuesToBeOfType.json | 460 +++++++++++++++ .../schemas/ExpectColumnValuesToBeUnique.json | 447 +++++++++++++++ .../ExpectColumnValuesToMatchLikePattern.json | 458 +++++++++++++++ ...ectColumnValuesToMatchLikePatternList.json | 478 ++++++++++++++++ .../ExpectColumnValuesToMatchRegex.json | 458 +++++++++++++++ .../ExpectColumnValuesToMatchRegexList.json | 478 ++++++++++++++++ .../ExpectColumnValuesToNotBeInSet.json | 519 +++++++++++++++++ .../ExpectColumnValuesToNotBeNull.json | 447 +++++++++++++++ ...pectColumnValuesToNotMatchLikePattern.json | 457 +++++++++++++++ ...ColumnValuesToNotMatchLikePatternList.json | 460 +++++++++++++++ .../ExpectColumnValuesToNotMatchRegex.json | 457 +++++++++++++++ ...ExpectColumnValuesToNotMatchRegexList.json | 460 +++++++++++++++ .../ExpectCompoundColumnsToBeUnique.json | 459 +++++++++++++++ .../schemas/ExpectMulticolumnSumToEqual.json | 480 ++++++++++++++++ .../ExpectQueryResultsToMatchComparison.json | 262 +++++++++ ...ectColumnValuesToBeUniqueWithinRecord.json | 462 +++++++++++++++ .../ExpectTableColumnCountToBeBetween.json | 267 +++++++++ .../ExpectTableColumnCountToEqual.json | 242 ++++++++ .../ExpectTableColumnsToMatchOrderedList.json | 245 ++++++++ .../schemas/ExpectTableColumnsToMatchSet.json | 258 +++++++++ .../ExpectTableRowCountToBeBetween.json | 480 ++++++++++++++++ .../schemas/ExpectTableRowCountToEqual.json | 437 ++++++++++++++ .../ExpectTableRowCountToEqualOtherTable.json | 435 ++++++++++++++ .../expectations/core/schemas/README.md | 9 + .../schemas/UnexpectedRowsExpectation.json | 223 ++++++++ .../expectations/core/schemas/__init__.py | 0 .../core/test_core_model_schemas.py | 49 ++ 53 files changed, 22442 insertions(+) create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnToExist.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json create mode 100644 great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json create mode 100644 great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json create mode 100644 great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json create mode 100644 great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json create mode 100644 great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json create mode 100644 great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json create mode 100644 great_expectations/expectations/core/schemas/README.md create mode 100644 great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json create mode 100644 great_expectations/expectations/core/schemas/__init__.py diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json new file mode 100644 index 000000000000..bd16a5da3e8b --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json @@ -0,0 +1,503 @@ +{ + "title": "Expect column distinct values to be in set", + "description": "Expect the set of distinct column values to be contained by a given set.\n\nExpectColumnDistinctValuesToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n The success value for this expectation will match that of [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set).\n\nSee Also:\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2, 3, 4, 5]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_set": { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "anyOf": [ + { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "oneOf": [ + { + "title": "Text", + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "examples": [ + [ + "a", + "b", + "c", + "d", + "e" + ], + [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + "2024-01-05" + ] + ] + }, + { + "title": "Numbers", + "type": "array", + "items": { + "type": "number" + }, + "minItems": 1, + "examples": [ + [ + 1, + 2, + 3, + 4, + 5 + ], + [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5 + ], + [ + 1, + 2.2, + 3, + 4.4, + 5 + ] + ] + } + ] + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnDistinctValuesToBeInSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_distinct_values_to_be_in_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the set of distinct column values to be contained by a given set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json new file mode 100644 index 000000000000..f9602e8bf38b --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json @@ -0,0 +1,503 @@ +{ + "title": "Expect column distinct values to contain set", + "description": "Expect the set of distinct column values to contain a given set.\n\nExpectColumnDistinctValuesToContainSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test\",\n value_set=[1, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_set": { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "anyOf": [ + { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "oneOf": [ + { + "title": "Text", + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "examples": [ + [ + "a", + "b", + "c", + "d", + "e" + ], + [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + "2024-01-05" + ] + ] + }, + { + "title": "Numbers", + "type": "array", + "items": { + "type": "number" + }, + "minItems": 1, + "examples": [ + [ + 1, + 2, + 3, + 4, + 5 + ], + [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5 + ], + [ + 1, + 2.2, + 3, + 4.4, + 5 + ] + ] + } + ] + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnDistinctValuesToContainSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_distinct_values_to_contain_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the set of distinct column values to contain a given set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json new file mode 100644 index 000000000000..ae786ff01611 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json @@ -0,0 +1,503 @@ +{ + "title": "Expect column distinct values to equal set", + "description": "Expect the set of distinct column values to equal a given set.\n\nExpectColumnDistinctValuesToEqualSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_set": { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "anyOf": [ + { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "oneOf": [ + { + "title": "Text", + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "examples": [ + [ + "a", + "b", + "c", + "d", + "e" + ], + [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + "2024-01-05" + ] + ] + }, + { + "title": "Numbers", + "type": "array", + "items": { + "type": "number" + }, + "minItems": 1, + "examples": [ + [ + 1, + 2, + 3, + 4, + 5 + ], + [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5 + ], + [ + 1, + 2.2, + 3, + 4.4, + 5 + ] + ] + } + ] + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnDistinctValuesToEqualSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_distinct_values_to_equal_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the set of distinct column values to equal a given set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json b/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json new file mode 100644 index 000000000000..86a5dca97a17 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json @@ -0,0 +1,531 @@ +{ + "title": "Expect column KL divergence to be less than", + "description": "Expect the Kulback-Leibler (KL) divergence (relative entropy) of the specified column with respect to the partition object to be lower than the provided threshold.\n\nKL divergence compares two distributions. The higher the divergence value (relative entropy), the larger the difference between the two distributions. A relative entropy of zero indicates that the data are distributed identically, when binned according to the provided partition.\n\nIn many practical contexts, choosing a value between 0.5 and 1 will provide a useful test.\n\nThis expectation works on both categorical and continuous partitions. See notes below for details.\n\nExpectColumnKlDivergenceToBeLessThan is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n partition_object (dict or None): The expected partition object. See [partition_object](https://docs.greatexpectations.io/docs/reference/expectations/distributional_expectations/#partition-objects).\n threshold (float or None): The maximum KL divergence to for which to return success=True. If KL divergence is largerthan the provided threshold, the test will return success=False.\n internal_weight_holdout (float between 0 and 1 or None): The amount of weight to split uniformly among zero-weighted partition bins. internal_weight_holdout provides a mechanisms to make the test less strict by assigning positive weights to values observed in the data for which the partition explicitly expected zero weight. With no internal_weight_holdout, any value observed in\n such a region will cause KL divergence to rise to +Infinity. Defaults to 0.\n tail_weight_holdout (float between 0 and 1 or None): The amount of weight to add to the tails of the histogram.Tail weight holdout is split evenly between (-Infinity, min(partition_object['bins'])) and (max(partition_object['bins']), +Infinity). tail_weight_holdout provides a mechanism to make the test less strict by assigning positive weights to values observed in the data that are not present in the partition. With no tail_weight_holdout, any value observed outside the provided partition_object will cause KL divergence to rise to +Infinity. Defaults to 0.\n bucketize_data (boolean): If True, then continuous data will be bucketized before evaluation. Setting this parameter to false allows evaluation of KL divergence with a None partition object for profiling against discrete data.\n min_value (float or None): The minimum value for the column.\n max_value (float or None): The maximum value for the column.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * observed_value field in the result object is customized for this expectation to be a float representing the true KL divergence (relative entropy) or None if the value is calculated as infinity, -infinity, or NaN\n * details.observed_partition in the result object is customized for this expectation to be a dict representing the partition observed in the data\n * details.expected_partition in the result object is customized for this expectation to be a dict representing the partition against which the data were compared, after applying specified weight holdouts\n\n If the partition_object is categorical, this expectation will expect the values in column to also be categorical.\n\n * If the column includes values that are not present in the partition, the tail_weight_holdout will be equally split among those values, providing a mechanism to weaken the strictness of the expectation (otherwise, relative entropy would immediately go to infinity).\n * If the partition includes values that are not present in the column, the test will simply include zero weight for that value.\n\n If the partition_object is continuous, this expectation will discretize the values in the column according to the bins specified in the partition_object, and apply the test to the resulting distribution.\n\n * The internal_weight_holdout and tail_weight_holdout parameters provide a mechanism to weaken the expectation, since an expected weight of zero would drive relative entropy to be infinite if any data are observed in that interval.\n * If internal_weight_holdout is specified, that value will be distributed equally among any intervals with weight zero in the partition_object.\n * If tail_weight_holdout is specified, that value will be appended to the tails of the bins ((-Infinity, min(bins)) and (max(bins), Infinity).\n\n If relative entropy/kl divergence goes to infinity for any of the reasons mentioned above, the observed value will be set to None. This is because inf, -inf, Nan, are not json serializable and cause some json parsers to crash when encountered. The python None token will be serialized to null in json.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test\n 0 \"A\"\n 1 \"A\"\n 2 \"A\"\n 3 \"A\"\n 4 \"A\"\n 5 \"B\"\n 6 \"B\"\n 7 \"B\"\n 8 \"C\"\n 9 \"C\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnKLDivergenceToBeLessThan(\n column=\"test\",\n partition_object={\"weights\": [0.5, 0.3, 0.2], \"values\": [\"A\", \"B\", \"C\"]},\n threshold=0.1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.0,\n \"details\": {\n \"observed_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.5,\n 0.3,\n 0.2\n ]\n },\n \"expected_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.5,\n 0.3,\n 0.2\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnKLDivergenceToBeLessThan(\n column=\"test\",\n partition_object={\"weights\": [0.3333333333333333, 0.3333333333333333, 0.3333333333333333], \"values\": [\"A\", \"B\", \"C\"]},\n threshold=0.01\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.06895927460353621,\n \"details\": {\n \"observed_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.5,\n 0.3,\n 0.2\n ]\n },\n \"expected_partition\": {\n \"values\": [\n \"A\",\n \"B\",\n \"C\"\n ],\n \"weights\": [\n 0.3333333333333333,\n 0.3333333333333333,\n 0.3333333333333333\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "partition_object": { + "title": "Partition Object", + "description": "The expected partition object.", + "type": "object" + }, + "threshold": { + "title": "Threshold", + "description": "The maximum KL divergence to for which to return success=True. If KL divergence is largerthan the provided threshold, the test will return success=False.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "internal_weight_holdout": { + "title": "Internal Weight Holdout", + "description": "The amount of weight to split uniformly among zero-weighted partition bins. internal_weight_holdout provides a mechanisms to make the test less strict by assigning positive weights to values observed in the data for which the partition explicitly expected zero weight.", + "default": 0, + "anyOf": [ + { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + { + "type": "object" + } + ] + }, + "tail_weight_holdout": { + "title": "Tail Weight Holdout", + "description": "The amount of weight to add to the tails of the histogram.Tail weight holdout is split evenly between (-Infinity, min(partition_object['bins'])) and (max(partition_object['bins']), +Infinity). tail_weight_holdout provides a mechanism to make the test less strict by assigning positive weights to values observed in the data that are not present in the partition. With no tail_weight_holdout, any value observed outside the provided partition_object will cause KL divergence to rise to +Infinity. Defaults to 0.", + "default": 0, + "anyOf": [ + { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + { + "type": "object" + } + ] + }, + "bucketize_data": { + "title": "Bucketize Data", + "description": "If True, then continuous data will be bucketized before evaluation. Setting this parameter to false allows evaluation of KL divergence with a None partition object for profiling against discrete data.", + "default": true, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value for the column.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value for the column.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnKLDivergenceToBeLessThan" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_kl_divergence_to_be_less_than" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation", + "distributional expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the Kulback-Leibler (KL) divergence (relative entropy) of the specified column with respect to the partition object to be lower than the provided threshold." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json new file mode 100644 index 000000000000..69969f6badeb --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json @@ -0,0 +1,497 @@ +{ + "title": "Expect column maximum to be between", + "description": "Expect the column maximum to be between a minimum value and a maximum value.\n\nExpectColumnMaxToBeBetween is a Column Aggregate Expectation\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value of the acceptable range for the column maximum.\n max_value (comparable type or None): The maximum value of the acceptable range for the column maximum.\n strict_min (boolean): If True, the lower bound of the column maximum acceptable rangemust be strictly larger than min_value, default=False\n strict_max (boolean): If True, the upper bound of the column maximum acceptable rangemust be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column max\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 7.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value of the acceptable range for the column maximum.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value of the acceptable range for the column maximum.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the lower bound of the column maximum acceptable rangemust be strictly larger than min_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the upper bound of the column maximum acceptable rangemust be strictly smaller than max_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnMaxToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_max_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column maximum to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json new file mode 100644 index 000000000000..52a304f90ba5 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json @@ -0,0 +1,497 @@ +{ + "title": "Expect column mean to be between", + "description": "Expect the column mean to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnMeanToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column mean.\n max_value (float or None): The maximum value for the column mean.\n strict_min (boolean): If True, the column mean must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column mean must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound.\n * If max_value is None, then min_value is treated as a lower bound.\n * observed_value field in the result object is customized for this expectation to be a float representing the true mean for the column\n\nSee Also:\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.275\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3.375\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value for the column mean.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value for the column mean.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the column mean must be strictly larger than min_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the column mean must be strictly smaller than max_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnMeanToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_mean_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column mean to be between a minimum value and a maximum value (inclusive)." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json new file mode 100644 index 000000000000..5fa3a0fec133 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json @@ -0,0 +1,497 @@ +{ + "title": "Expect column median to be between", + "description": "Expect the column median to be between a minimum value and a maximum value.\n\nExpectColumnMedianToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for the column median.\n max_value (int or None): The maximum value for the column median.\n strict_min (boolean): If True, the column median must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column median must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true median for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.15\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.75\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value for the column median.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value for the column median.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the column median must be strictly larger than min_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the column median must be strictly smaller than max_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnMedianToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_median_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column median to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json new file mode 100644 index 000000000000..82798b9ae7b6 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json @@ -0,0 +1,497 @@ +{ + "title": "Expect column minimum to be between", + "description": "Expect the column minimum to be between a minimum value and a maximum value.\n\nExpectColumnMinToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal column minimum allowed.\n max_value (comparable type or None): The maximal column minimum allowed.\n strict_min (boolean): If True, the minimal column minimum must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the maximal column minimum must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column min\n\nSee Also:\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMinToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .8\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimal column minimum allowed.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximal column minimum allowed.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the minimal column minimum must be strictly larger than min_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the maximal column minimum must be strictly smaller than max_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnMinToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_min_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column minimum to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json new file mode 100644 index 000000000000..7011f29bdea9 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json @@ -0,0 +1,515 @@ +{ + "title": "Expect column most common value to be in set", + "description": "Expect the most common value to be within the designated value set.\n\nExpectColumnMostCommonValueToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A list of potential values to match.\n ties_okay (boolean or None): If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values. Default False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * observed_value field in the result object is customized for this expectation to be a list representing the most common values in the column, which is often a single element... if there is a tie for most common among multiple values, observed_value will contain a single copy of each most common value\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test2\",\n value_set=[1, 2, 4],\n ties_okay=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ]\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_set": { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "anyOf": [ + { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "oneOf": [ + { + "title": "Text", + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "examples": [ + [ + "a", + "b", + "c", + "d", + "e" + ], + [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + "2024-01-05" + ] + ] + }, + { + "title": "Numbers", + "type": "array", + "items": { + "type": "number" + }, + "minItems": 1, + "examples": [ + [ + 1, + 2, + 3, + 4, + 5 + ], + [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5 + ], + [ + 1, + 2.2, + 3, + 4.4, + 5 + ] + ] + } + ] + }, + { + "type": "object" + } + ] + }, + "ties_okay": { + "title": "Ties Okay", + "description": "If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values.", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnMostCommonValueToBeInSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_most_common_value_to_be_in_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the most common value to be within the designated value set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json new file mode 100644 index 000000000000..e102e81efb25 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json @@ -0,0 +1,483 @@ +{ + "title": "Expect column pair values A to be greater than B", + "description": "Expect the values in column A to be greater than column B.\n\nExpectColumnPairValuesAToBeGreaterThanB is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n or_equal (boolean or None): If True, then values can be equal, not strictly greater.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 2 1\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test\",\n column_B=\"test2\",\n or_equal=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test2\",\n column_B=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ],\n [\n 2,\n 2\n ],\n [\n 4,\n 4\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_A": { + "title": "Column A", + "description": "The first column name.", + "minLength": 1, + "type": "string" + }, + "column_B": { + "title": "Column B", + "description": "The second column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "or_equal": { + "title": "Or Equal", + "description": "If True, then values can be equal, not strictly greater.", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "ignore_row_if": { + "title": "Ignore Row If", + "description": "If specified, sets the condition on which a given row is to be ignored.", + "default": "both_values_are_missing", + "anyOf": [ + { + "enum": [ + "both_values_are_missing", + "either_value_is_missing", + "neither" + ], + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnPairValuesAToBeGreaterThanB" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_pair_values_a_to_be_greater_than_b" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column_pair", + "description": "Column Pair Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column pair map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the values in column A to be greater than column B." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column_A", + "column_B" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json new file mode 100644 index 000000000000..7c271c3a2c54 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json @@ -0,0 +1,472 @@ +{ + "title": "Expect column pair values to be equal", + "description": "Expect the values in column A to be the same as column B.\n\nExpectColumnPairValuesToBeEqual is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"both_values_are_missing\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 2\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=0.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=1.0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_A": { + "title": "Column A", + "description": "The first column name.", + "minLength": 1, + "type": "string" + }, + "column_B": { + "title": "Column B", + "description": "The second column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "ignore_row_if": { + "title": "Ignore Row If", + "description": "If specified, sets the condition on which a given row is to be ignored.", + "default": "both_values_are_missing", + "anyOf": [ + { + "enum": [ + "both_values_are_missing", + "either_value_is_missing", + "neither" + ], + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnPairValuesToBeEqual" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_pair_values_to_be_equal" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column_pair", + "description": "Column Pair Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column pair map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the values in column A to be the same as column B." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column_A", + "column_B" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json new file mode 100644 index 000000000000..f152d27ee199 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json @@ -0,0 +1,491 @@ +{ + "title": "Expect column pair values to be in set", + "description": "Expect the paired values from columns A and B to belong to a set of valid pairs.\n\nExpectColumnPairValuesToBeInSet is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n value_pairs_set (list of tuples): All the valid pairs to be matched.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(2,1), (1,1)],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 4,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(1,2) (4,1)],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n [\n 1,\n 1\n ],\n [\n 2,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_A": { + "title": "Column A", + "description": "The first column name.", + "minLength": 1, + "type": "string" + }, + "column_B": { + "title": "Column B", + "description": "The second column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_pairs_set": { + "title": "Value Pairs Set", + "anyOf": [ + { + "type": "array", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + {}, + {} + ] + } + }, + { + "type": "object" + } + ] + }, + "ignore_row_if": { + "title": "Ignore Row If", + "default": "both_values_are_missing", + "anyOf": [ + { + "enum": [ + "both_values_are_missing", + "either_value_is_missing", + "neither" + ], + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnPairValuesToBeInSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_pair_values_to_be_in_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column_pair", + "description": "Column Pair Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column pair map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the paired values from columns A and B to belong to a set of valid pairs." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Redshift" + ] + } + } + } + }, + "required": [ + "column_A", + "column_B", + "value_pairs_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json new file mode 100644 index 000000000000..4f963aa201ce --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfNonNullValuesToBeBetween.json @@ -0,0 +1,484 @@ +{ + "title": "Expect column proportion of non-null values to be between", + "description": "Expect the proportion of non-null values to be between a minimum value and a maximum value.\n\nFor example, in a column containing [1, 2, None, 3, None, None, 4, 4, 4, 4], there are 7 non-null values and 10 total values for a proportion of 0.7.\n\nExpectColumnProportionOfNonNullValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum proportion of non-null values (proportions are on the range 0 to 1).\n max_value (float or None): The maximum proportion of non-null values (proportions are on the range 0 to 1).\n strict_min (boolean): If True, the minimum proportion of non-null values must be strictly larger than min_value. default=False\n strict_max (boolean): If True, the maximum proportion of non-null values must be strictly smaller than max_value. default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the proportion of non-null values in the column\n\nSee Also:\n [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" None\n 2 \"acc\" 1\n 3 None 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnProportionOfNonNullValuesToBeBetween(\n column=\"test\",\n min_value=0,\n max_value=0.8\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.75\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnProportionOfNonNullValuesToBeBetween(\n column=\"test2\",\n min_value=0.3,\n max_value=0.5,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.75\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum proportion of non-null values (proportions are on the range 0 to 1).", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum proportion of non-null values (proportions are on the range 0 to 1).", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the minimum proportion of non-null values must be strictly larger than min_value.", + "default": false, + "type": "boolean" + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the maximum proportion of non-null values must be strictly smaller than max_value.", + "default": false, + "type": "boolean" + }, + "library_metadata": { + "title": "Library Metadata", + "default": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + }, + "type": "object" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnProportionOfNonNullValuesToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_proportion_of_non_null_values_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Completeness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the proportion of non-null values to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json new file mode 100644 index 000000000000..bc0f82cee5e3 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json @@ -0,0 +1,498 @@ +{ + "title": "Expect column proportion of unique values to be between", + "description": "Expect the proportion of unique values to be between a minimum value and a maximum value.\n\nFor example, in a column containing [1, 2, 2, 3, 3, 3, 4, 4, 4, 4], there are 4 unique values and 10 total values for a proportion of 0.4.\n\nExpectColumnProportionOfUniqueValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum proportion of unique values (Proportions are on the range 0 to 1).\n max_value (float or None): The maximum proportion of unique values (Proportions are on the range 0 to 1).\n strict_min (boolean): If True, the minimum proportion of unique values must be strictly larger than min_value. default=False\n strict_max (boolean): If True, the maximum proportion of unique values must be strictly smaller than max_value. default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the proportion of unique values in the column\n\nSee Also:\n [ExpectColumnUniqueValueCountToBeBetween](https://greatexpectations.io/expectations/expect_column_unique_value_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test\",\n min_value=0,\n max_value=0.8\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .75\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test2\",\n min_value=0.3,\n max_value=0.5,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .5\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum proportion of unique values (Proportions are on the range 0 to 1).", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum proportion of unique values (Proportions are on the range 0 to 1).", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the minimum proportion of unique values must be strictly larger than min_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the maximum proportion of unique values must be strictly smaller than max_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "library_metadata": { + "title": "Library Metadata", + "default": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + }, + "type": "object" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnProportionOfUniqueValuesToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_proportion_of_unique_values_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the proportion of unique values to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json new file mode 100644 index 000000000000..50587c0c6814 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json @@ -0,0 +1,493 @@ +{ + "title": "Expect column quantile values to be between", + "description": "Expect the specific provided column quantiles to be between a minimum value and a maximum value.\n\nExpectColumnQuantileValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nExpectColumnQuantileValuesToBeBetween can be computationally intensive for large datasets.\n\nArgs:\n column (str): The column name.\n quantile_ranges (dictionary with keys 'quantiles' and 'value_ranges'): Key 'quantiles' is an increasingly ordered list of desired quantile values (floats). Key 'value_ranges' is a list of 2-value lists that specify a lower and upper bound (inclusive) for the corresponding quantile (with [min, max] ordering). The length of the 'quantiles' list and the 'value_ranges' list must be equal.\n allow_relative_error (boolean or string): Whether to allow relative error in quantile communications on backends that support or require it.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound only\n * If max_value is None, then min_value is treated as a lower bound only\n * details.success_details field in the result object is customized for this expectation\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test\n 0 1 1\n 1 2 7\n 2 2 2.5\n 3 3 3\n 4 3 2\n 5 3 5\n 6 4 6\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1,\n 2,\n 3,\n 4\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n true,\n true\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test2\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1.0,\n 2.5,\n 5.0,\n 7.0\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n false,\n false\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "quantile_ranges": { + "title": "Quantile Ranges", + "description": "Key 'quantiles' is an increasingly ordered list of desired quantile values (floats). Key 'value_ranges' is a list of 2-value lists that specify a lower and upper bound (inclusive) for the corresponding quantile (with [min, max] ordering).", + "anyOf": [ + { + "$ref": "#/definitions/QuantileRange" + }, + { + "type": "object" + } + ] + }, + "allow_relative_error": { + "title": "Allow Relative Error", + "description": "Whether to allow relative error in quantile communications on backends that support or require it.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnQuantileValuesToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_quantile_values_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the specific provided column quantiles to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "quantile_ranges" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + }, + "QuantileRange": { + "title": "QuantileRange", + "type": "object", + "properties": { + "quantiles": { + "title": "Quantiles", + "type": "array", + "items": { + "type": "number" + } + }, + "value_ranges": { + "title": "Value Ranges", + "type": "array", + "items": { + "type": "array", + "items": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "integer" + } + ] + } + } + } + }, + "required": [ + "quantiles", + "value_ranges" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json new file mode 100644 index 000000000000..85b84e711f2f --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json @@ -0,0 +1,481 @@ +{ + "title": "Expect column standard deviation to be between", + "description": "Expect the column standard deviation to be between a minimum value and a maximum value.\n\nUses sample standard deviation (normalized by N-1).\n\nExpectColumnStdevToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column standard deviation.\n max_value (float or None): The maximum value for the column standard deviation.\n strict_min (boolean): If True, the column standard deviation must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the column standard deviation must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true standard deviation for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.5251983752196243\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test2\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.5617376914898995\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value for the column standard deviation.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value for the column standard deviation.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the column standard deviation must be strictly larger than min_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the column standard deviation must be strictly smaller than max_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnStdevToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_stdev_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column standard deviation to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json new file mode 100644 index 000000000000..08f79476b9b6 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json @@ -0,0 +1,497 @@ +{ + "title": "Expect column sum to be between", + "description": "Expect the column sum to be between a minimum value and a maximum value.\n\nExpectColumnSumToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal sum allowed.\n max_value (comparable type or None): The maximal sum allowed.\n strict_min (boolean): If True, the minimal sum must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the maximal sum must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column sum\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 5.1\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test2\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 13.5\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimal sum allowed.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximal sum allowed.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the minimal sum must be strictly larger than min_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the maximal sum must be strictly smaller than max_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnSumToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_sum_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column sum to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnToExist.json b/great_expectations/expectations/core/schemas/ExpectColumnToExist.json new file mode 100644 index 000000000000..d156433aedf6 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnToExist.json @@ -0,0 +1,248 @@ +{ + "title": "Expect column to exist", + "description": "Checks for the existence of a specified column within a table.\n\nExpectColumnToExist is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation. They are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n column_index (int or None, optional): If not None, checks the order of the columns. The expectation will fail if the column is not in location column_index (zero-indexed).\n result_format (str or None, optional): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None, optional): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None, optional): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nPassing Case:\n Input:\n ExpectColumnToExist(\n column=\"test\",\n column_index=0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {}\n }\n\nFailing Case:\n Input:\n ExpectColumnToExist(\n column=\"missing_column\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {}\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "column_index": { + "title": "Column Index", + "description": "If not None, checks the order of the columns. The expectation will fail if the column is not in location column_index (zero-indexed).", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnToExist" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_to_exist" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Checks for the existence of a specified column within a table." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json new file mode 100644 index 000000000000..0a718a9c1ada --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json @@ -0,0 +1,498 @@ +{ + "title": "Expect column unique value count to be between", + "description": "Expect the number of unique values to be between a minimum value and a maximum value.\n\nExpectColumnUniqueValueCountToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum number of unique values allowed.\n max_value (int or None): The maximum number of unique values allowed.\n strict_min (bool): If True, the column must have strictly more unique value count than min_value to pass.\n strict_max (bool): If True, the column must have strictly fewer unique value count than max_value to pass.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be an int representing the number of unique values the column\n\nSee Also:\n [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum number of unique values allowed.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum number of unique values allowed.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the column must have strictly more unique value count than min_value to pass.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the column must have strictly fewer unique value count than max_value to pass.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "library_metadata": { + "title": "Library Metadata", + "default": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + }, + "type": "object" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnUniqueValueCountToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_unique_value_count_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Aggregate" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column aggregate expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the number of unique values to be between a minimum value and a maximum value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json new file mode 100644 index 000000000000..3499a1d3ed50 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json @@ -0,0 +1,497 @@ +{ + "title": "Expect column value lengths to be between", + "description": "Expect the column entries to be strings with length between a minimum value and a maximum value (inclusive).\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToBeBetween is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for a column entry length.\n max_value (int or None): The maximum value for a column entry length.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectColumnValueLengthsToEqual](https://greatexpectations.io/expectations/expect_column_value_lengths_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test\",\n min_value=5,\n max_value=5,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value for a column entry length.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value for a column entry length.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, values must be strictly larger than min_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, values must be strictly smaller than max_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValueLengthsToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_value_lengths_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings with length between a minimum value and a maximum value (inclusive)." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json new file mode 100644 index 000000000000..caa81ad27e0e --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json @@ -0,0 +1,460 @@ +{ + "title": "Expect column value lengths to equal", + "description": "Expect the column entries to be strings with length equal to the provided value.\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToEqual is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value (int): The expected value for a column entry length.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test2\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value": { + "title": "Value", + "description": "The expected value for a column entry length.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValueLengthsToEqual" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_value_lengths_to_equal" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings with length equal to the provided value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json b/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json new file mode 100644 index 000000000000..c6180fc27842 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json @@ -0,0 +1,473 @@ +{ + "title": "Expect column value z-scores to be less than", + "description": "Expect the Z-scores of a column's values to be less than a given threshold.\n\nExpectColumnValueZScoresToBeLessThan is a Column Map Expectation for typed-column backends, and also for PandasExecutionEngine where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n threshold (number): A maximum Z-score threshold. All column Z-scores that are lower than this threshold will evaluate successfully.\n double_sided (boolean): A True or False value indicating whether to evaluate double sidedly. Examples: (double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | (double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 -100000000000\n 1 1 -1\n 2 1 0\n 3 3 1\n 4 3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test\",\n threshold=1.96,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test2\",\n threshold=1,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 20.0,\n \"partial_unexpected_list\": [\n -100000000000\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 20.0,\n \"unexpected_percent_nonmissing\": 20.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "threshold": { + "title": "Threshold", + "description": "A maximum Z-score threshold. All column Z-scores that are lower than this threshold will evaluate successfully.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "double_sided": { + "title": "Double Sided", + "description": "A True or False value indicating whether to evaluate double sidedly. Examples: (double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | (double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValueZScoresToBeLessThan" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_value_z_scores_to_be_less_than" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the Z-scores of a column's values to be less than a given threshold." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "threshold", + "double_sided" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json new file mode 100644 index 000000000000..015525fbb160 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json @@ -0,0 +1,513 @@ +{ + "title": "Expect column values to be between", + "description": "Expect the column entries to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnValuesToBeBetween is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value for a column entry.\n max_value (comparable type or None): The maximum value for a column entry.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False.\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and there is no minimum value checked.\n * If max_value is None, then min_value is treated as a lower bound, and there is no maximum value checked.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 25.0,\n \"partial_unexpected_list\": [\n 7.0\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 25.0,\n \"unexpected_percent_nonmissing\": 25.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum value for a column entry.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum value for a column entry.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, values must be strictly larger than min_value.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "The maximum value for a column entry.", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be between a minimum value and a maximum value (inclusive)." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json new file mode 100644 index 000000000000..caca615d9217 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json @@ -0,0 +1,519 @@ +{ + "title": "Expect column values to be in set", + "description": "Expect each column value to be in a given set.\n\nExpectColumnValuesToBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_not_be_in_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test2\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n 1,\n 1,\n 1\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_set": { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "anyOf": [ + { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "oneOf": [ + { + "title": "Text", + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "examples": [ + [ + "a", + "b", + "c", + "d", + "e" + ], + [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + "2024-01-05" + ] + ] + }, + { + "title": "Numbers", + "type": "array", + "items": { + "type": "number" + }, + "minItems": 1, + "examples": [ + [ + 1, + 2, + 3, + 4, + 5 + ], + [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5 + ], + [ + 1, + 2.2, + 3, + 4.4, + 5 + ] + ] + } + ] + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToBeInSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_be_in_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect each column value to be in a given set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json new file mode 100644 index 000000000000..274ef4729965 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json @@ -0,0 +1,458 @@ +{ + "title": "Expect column values to be in type list", + "description": "Expect a column to contain values from a specified type list.\n\nExpectColumnValuesToBeInTypeList is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype provides an unambiguous constraints (any dtype except 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeInTypeList will independently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type_list (list[str] or None): \nA list of strings representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeOfType](https://greatexpectations.io/expectations/expect_column_values_to_be_of_type)\n\nSupported Data Sources:\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test2\",\n type_list=[\"NUMBER\", \"STRING\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test\",\n type_list=[\"NUMBER\", \"DOUBLE\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "type_list": { + "title": "Type List", + "description": "\n A list of strings representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n ", + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToBeInTypeList" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_be_in_type_list" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect a column to contain values from a specified type list." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "SQL Server", + "BigQuery", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json new file mode 100644 index 000000000000..59de3fdfbe60 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json @@ -0,0 +1,447 @@ +{ + "title": "Expect column values to be null", + "description": "Expect the column values to be null.\n\nExpectColumnValuesToBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeNull](https://greatexpectations.io/expectations/expect_column_values_to_not_be_null)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test2\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n true,\n false\n ]\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToBeNull" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_be_null" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Completeness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column values to be null." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json new file mode 100644 index 000000000000..3a0088c88ee1 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json @@ -0,0 +1,460 @@ +{ + "title": "Expect column values to be of type", + "description": "Expect a column to contain values of a specified data type.\n\nExpectColumnValuesToBeOfType is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeOfType will\nindependently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type\\_ (str): \nA string representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names. Note that the strings representing these types are sometimes case-sensitive. For instance, with a Pandas backend `timestamp` will be unrecognized and fail the expectation, while `Timestamp` would pass with valid data.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeInTypeList](https://greatexpectations.io/expectations/expect_column_values_to_be_in_type_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test2\",\n type_=\"NUMBER\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test\",\n type_=\"DOUBLE\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "type_": { + "title": "Type ", + "description": "\n A string representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n ", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToBeOfType" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_be_of_type" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect a column to contain values of a specified data type." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "type_" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json new file mode 100644 index 000000000000..79eef36bd3d7 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json @@ -0,0 +1,447 @@ +{ + "title": "Expect column values to be unique", + "description": "Expect each column value to be unique.\n\nThis expectation detects duplicates. All duplicated values are counted as exceptions.\n\nFor example, [1, 2, 3, 3, 3] will return [3, 3, 3] in result.exceptions_list, with unexpected_percent = 60.0.\n\nExpectColumnValuesToBeUnique is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 \"A\"\n 1 2 \"A\"\n 2 3 \"B\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"A\",\n \"A\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": true\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToBeUnique" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_be_unique" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect each column value to be unique." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json new file mode 100644 index 000000000000..64c8999ae0c6 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json @@ -0,0 +1,458 @@ +{ + "title": "Expect column values to match like pattern", + "description": "Expect the column entries to be strings that match a given like pattern expression.\n\nExpectColumnValuesToMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bee\",\n \"24601\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "like_pattern": { + "title": "Like Pattern", + "description": "The SQL like pattern expression the column entries should match.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToMatchLikePattern" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_match_like_pattern" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that match a given like pattern expression." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "SQL Server", + "Databricks (SQL)", + "BigQuery", + "Snowflake" + ] + } + } + } + }, + "required": [ + "column", + "like_pattern" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json new file mode 100644 index 000000000000..0b9973fc30fe --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json @@ -0,0 +1,478 @@ +{ + "title": "Expect column values to match like pattern list", + "description": "Expect the column entries to be strings that match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one like pattern in the list. Use 'all' if it should match each like pattern in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[ad]%\", \"[a]%\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "like_pattern_list": { + "title": "Like Pattern List", + "description": "The list of SQL like pattern expressions the column entries should match.", + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "object" + } + ] + }, + "match_on": { + "title": "Match On", + "description": "'any' or 'all'. Use 'any' if the value should match at least one like pattern in the list. Use 'all' if it should match each like pattern in the list.", + "default": "any", + "anyOf": [ + { + "enum": [ + "any", + "all" + ], + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToMatchLikePatternList" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_match_like_pattern_list" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that match any of a provided list of like pattern expressions." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "SQL Server", + "Databricks (SQL)", + "BigQuery", + "Snowflake" + ] + } + } + } + }, + "required": [ + "column", + "like_pattern_list" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json new file mode 100644 index 000000000000..fdfaf5758818 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json @@ -0,0 +1,458 @@ +{ + "title": "Expect column values to match regex", + "description": "Expect the column entries to be strings that match a given regular expression.\n\nValid matches can be found anywhere in the string, for example \"[at]+\" will identify the following strings as expected: \"cat\", \"hat\", \"aa\", \"a\", and \"t\", and the following strings as unexpected: \"fish\", \"dog\".\n\nExpectColumnValuesToMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "regex": { + "title": "Regex", + "description": "The regular expression the column entries should match.", + "default": "(?s).*", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToMatchRegex" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_match_regex" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that match a given regular expression." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "BigQuery", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json new file mode 100644 index 000000000000..5a23d4e31b15 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json @@ -0,0 +1,478 @@ +{ + "title": "Expect column values to match regex list", + "description": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions.\n\nMatches can be anywhere in the string.\n\nExpectColumnValuesToMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one regular expression in the list. Use 'all' if it should match each regular expression in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test2\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "regex_list": { + "title": "Regex List", + "description": "The list of regular expressions which the column entries should match.", + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "object" + } + ] + }, + "match_on": { + "title": "Match On", + "description": "'any' or 'all'. Use 'any' if the value should match at least one regular expression in the list. Use 'all' if it should match each regular expression in the list.", + "default": "any", + "anyOf": [ + { + "enum": [ + "any", + "all" + ], + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToMatchRegexList" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_match_regex_list" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "BigQuery", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "regex_list" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json new file mode 100644 index 000000000000..c018c345a1fe --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json @@ -0,0 +1,519 @@ +{ + "title": "Expect column values to not be in set", + "description": "Expect column entries to not be in the set.\n\nExpectColumnValuesToNotBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test2\",\n value_set=[2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n 2,\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "value_set": { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "anyOf": [ + { + "title": "Value Set", + "description": "A set of objects used for comparison.", + "oneOf": [ + { + "title": "Text", + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "examples": [ + [ + "a", + "b", + "c", + "d", + "e" + ], + [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + "2024-01-05" + ] + ] + }, + { + "title": "Numbers", + "type": "array", + "items": { + "type": "number" + }, + "minItems": 1, + "examples": [ + [ + 1, + 2, + 3, + 4, + 5 + ], + [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5 + ], + [ + 1, + 2.2, + 3, + 4.4, + 5 + ] + ] + } + ] + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToNotBeInSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_not_be_in_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect column entries to not be in the set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "value_set" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json new file mode 100644 index 000000000000..583a1617bac6 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json @@ -0,0 +1,447 @@ +{ + "title": "Expect column values to not be null", + "description": "Expect the column values to not be null.\n\nTo be counted as an exception, values must be explicitly null or missing, such as a NULL in PostgreSQL or an\nnp.NaN in pandas. Empty strings don't count as null unless they have been coerced to a null type.\n\nExpectColumnValuesToNotBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeNull](https://greatexpectations.io/expectations/expect_column_values_to_be_null)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n null\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n null,\n null\n ]\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToNotBeNull" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_not_be_null" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Completeness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column values to not be null." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json new file mode 100644 index 000000000000..8dc43d0ff0fe --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json @@ -0,0 +1,457 @@ +{ + "title": "Expect column values to not match like pattern", + "description": "Expect the column entries to be strings that do NOT match a given like pattern expression.\n\nExpectColumnValuesToNotMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"ade\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "like_pattern": { + "title": "Like Pattern", + "description": "The SQL like pattern expression the column entries should NOT match.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToNotMatchLikePattern" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_not_match_like_pattern" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that do NOT match a given like pattern expression." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "SQL Server", + "Databricks (SQL)", + "Snowflake" + ] + } + } + } + }, + "required": [ + "column", + "like_pattern" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json new file mode 100644 index 000000000000..5d18e902d412 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json @@ -0,0 +1,460 @@ +{ + "title": "Expect column values to not match like pattern list", + "description": "Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToNotMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "like_pattern_list": { + "title": "Like Pattern List", + "description": "The list of SQL like pattern expressions the column entries should NOT match.", + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToNotMatchLikePatternList" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_not_match_like_pattern_list" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "SQL Server", + "Databricks (SQL)", + "Snowflake" + ] + } + } + } + }, + "required": [ + "column", + "like_pattern_list" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json new file mode 100644 index 000000000000..36e0460a1882 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json @@ -0,0 +1,457 @@ +{ + "title": "Expect column values to not match regex", + "description": "Expect the column entries to be strings that do NOT match a given regular expression.\n\nThe regex must not match any portion of the provided string. For example, \"[at]+\" would identify the following strings as expected: \"fish\", \"dog\", and the following as unexpected: \"cat\", \"hat\".\n\nExpectColumnValuesToNotMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "regex": { + "title": "Regex", + "description": "The regular expression the column entries should NOT match.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToNotMatchRegex" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_not_match_regex" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that do NOT match a given regular expression." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column", + "regex" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json new file mode 100644 index 000000000000..88fd01070bf9 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json @@ -0,0 +1,460 @@ +{ + "title": "Expect column values to not match regex list", + "description": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string.\n\nExpectColumnValuesToNotMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should not match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test2\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column": { + "title": "Column", + "description": "The column name.", + "minLength": 1, + "type": "string" + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "regex_list": { + "title": "Regex List", + "description": "The list of regular expressions which the column entries should not match.", + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectColumnValuesToNotMatchRegexList" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_column_values_to_not_match_regex_list" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "column", + "description": "Column Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Validity" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "column map expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "Databricks (SQL)", + "SQLite" + ] + } + } + } + }, + "required": [ + "column", + "regex_list" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json b/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json new file mode 100644 index 000000000000..fe662a781351 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json @@ -0,0 +1,459 @@ +{ + "title": "Expect compound columns to be unique", + "description": "Expect the compound columns to be unique.\n\nExpectCompoundColumnsToBeUnique is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2 test3 test4\n 0 1 1 4 1\n 1 2 1 7 1\n 2 4 1 -3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test2\", \"test4\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_list": { + "title": "Column List", + "description": "Set of columns to be checked.", + "type": "array", + "items": { + "type": "string" + } + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "ignore_row_if": { + "title": "Ignore Row If", + "default": "all_values_are_missing", + "enum": [ + "all_values_are_missing", + "any_value_is_missing", + "never" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectCompoundColumnsToBeUnique" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_compound_columns_to_be_unique" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "multicolumn", + "description": "Multicolumn Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "multi-column expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the compound columns to be unique." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column_list" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json b/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json new file mode 100644 index 000000000000..bf0a9c1dae29 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json @@ -0,0 +1,480 @@ +{ + "title": "Expect multicolumn sum to equal", + "description": "Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total.\n\nExpectMulticolumnSumToEqual is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n sum_total (int or float): Expected sum of columns\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2 test3\n 0 1 2 4\n 1 2 -2 7\n 2 4 4 -3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7,\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_list": { + "title": "Column List", + "description": "Set of columns to be checked.", + "type": "array", + "items": { + "type": "string" + } + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "ignore_row_if": { + "title": "Ignore Row If", + "description": "If specified, sets the condition on which a given row is to be ignored.", + "default": "all_values_are_missing", + "anyOf": [ + { + "enum": [ + "all_values_are_missing", + "any_value_is_missing", + "never" + ], + "type": "string" + }, + { + "type": "object" + } + ] + }, + "sum_total": { + "title": "Sum Total", + "description": "Expected sum of columns", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectMulticolumnSumToEqual" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_multicolumn_sum_to_equal" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "multicolumn", + "description": "Multicolumn Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Numeric" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "multi-column expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column_list", + "sum_total" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json b/great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json new file mode 100644 index 000000000000..19d92630d381 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectQueryResultsToMatchComparison.json @@ -0,0 +1,262 @@ +{ + "title": "Expect query results to match comparison", + "description": "This Expectation will check if the results of a query matches the results of a query against another Data Source.\n\nExpectQueryResultsToMatchComparison executes one SQL query for each of two Data Sources and compares their results. It validates that the results from the current Data Source's query matches those from the comparison Data Source's query, above a specified threshold.\n\nEach record returned by the 'base_query' will be compared to each record returned by the 'comparison_query'.\n\nThe maximum number of records that will be returned for comparison from each query is 200.\n\nThe order of records returned does not matter unless the number of records returned would be greater than 200.\n\nColumn names do not matter, but the order of the columns does.\n\nMatch percentage (100% - unexpected percent) is compared to the mostly threshold to determine pass/fail.\n e.g.\nunexpected percent = 10%, mostly = 80%, (100% - 10%) > 80% - pass\nunexpected percent = 10%, mostly = 91%, (100% - 10%) < 91% - fail\n\n\nThe match percentage is computed by dividing the number of matching records by the maximum number of records in either the comparison result or the base result.\n e.g.\nComparison Row Count: 100 Base Row Count: 100 Matches: 100 Match Percentage: 100%\nComparison Row Count: 25 Base Row Count: 100 Matches: 25 Match Percentage: 25%\nComparison Row Count: 100 Base Row Count: 25 Matches: 1 Match Percentage: 1%\n\nIf both the base and comparison queries return 0 records, it is considered a successful result.\n\n\nArgs:\n base_query (str): A SQL query to be executed for this Data Asset.\n comparison_data_source_name (str): The name of the comparison Data Source to compare this Asset against.\n comparison_query (str): A SQL query to be executed for the comparison Data Source.\n mostly (float): Successful if at least `mostly` fraction of values match the Expectation.\n\nOther Parameters:\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\nSupported Data Sources:\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n Multi-source", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "base_query": { + "title": "Base Query", + "description": "A SQL query to be executed for this Data Asset.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "comparison_data_source_name": { + "title": "Comparison Data Source Name", + "description": "The name of the comparison Data Source to compare this Asset against.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "comparison_query": { + "title": "Comparison Query", + "description": "A SQL query to be executed for the comparison Data Source.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectQueryResultsToMatchComparison" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_query_results_to_match_comparison" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Multi-source" + ] + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "This Expectation will check if the results of a query matches the results of a query against another Data Source." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Snowflake", + "Databricks (SQL)", + "Redshift", + "SQLite" + ] + } + } + } + }, + "required": [ + "base_query", + "comparison_data_source_name", + "comparison_query" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json b/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json new file mode 100644 index 000000000000..5cdfe472ec0d --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json @@ -0,0 +1,462 @@ +{ + "title": "Expect select column values to be unique within record", + "description": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated.\n\nExpectSelectColumnValuesToBeUniqueWithinRecord is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): The column names to evaluate.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the Expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nFor example:\n::\n\n A B C\n 1 1 2 Fail\n 1 2 3 Pass\n 8 2 7 Pass\n 1 2 3 Pass\n 4 4 4 Fail\nExample Data:\n test test2 test3\n 0 1 1 2\n 1 1 2 3\n 2 8 2 7\n\nCode Examples:\n Passing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 1,\n \"test2\": 1,\n \"test3\": 2\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_list": { + "title": "Column List", + "description": "The column names to evaluate.", + "type": "array", + "items": { + "type": "string" + } + }, + "mostly": { + "title": "Mostly", + "description": "Successful if at least `mostly` fraction of values match the Expectation.", + "default": 1, + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], + "multipleOf": 0.01 + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "ignore_row_if": { + "title": "Ignore Row If", + "description": "If specified, sets the condition on which a given row is to be ignored.", + "default": "all_values_are_missing", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectSelectColumnValuesToBeUniqueWithinRecord" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_select_column_values_to_be_unique_within_record" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "multicolumn", + "description": "Multicolumn Map" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Uniqueness" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "multi-column expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "column_list" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json new file mode 100644 index 000000000000..6864ee09ffc2 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json @@ -0,0 +1,267 @@ +{ + "title": "Expect table column count to be between", + "description": "Expect the number of columns in a table to be between two values.\n\nExpectTableColumnCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of columns, inclusive.\n max_value (int or None): The maximum number of columns, inclusive.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable columns has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable columns has no maximum.\n\nSee Also:\n [ExpectTableColumnCountToEqual](https://greatexpectations.io/expectations/expect_table_column_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum number of columns, inclusive.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum number of columns, inclusive.", + "anyOf": [ + { + "type": "number" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableColumnCountToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_column_count_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the number of columns in a table to be between two values." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json new file mode 100644 index 000000000000..00ed17f3ed42 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json @@ -0,0 +1,242 @@ +{ + "title": "Expect table column count to equal", + "description": "Expect the number of columns in a table to equal a value.\n\nExpectTableColumnCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of columns.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableColumnCountToBeBetween](https://greatexpectations.io/expectations/expect_table_column_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {\n \"observed_value\": 2\n }\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {\n \"observed_value\": 2\n }\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "value": { + "title": "Value", + "description": "The expected number of columns.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableColumnCountToEqual" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_column_count_to_equal" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the number of columns in a table to equal a value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "value" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json new file mode 100644 index 000000000000..711f4ee0b9ad --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json @@ -0,0 +1,245 @@ +{ + "title": "Expect table columns to match ordered list", + "description": "Expect the columns in a table to exactly match a specified list.\n\nExpectTableColumnsToMatchOrderedList is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_list (list of str): The column names, in the correct order.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test\", \"test2\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test2\", \"test\", \"test3\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"Unnamed: 0\",\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": [\n {\n \"Expected Column Position\": 1,\n \"Expected\": \"test2\",\n \"Found\": \"test\"\n },\n {\n \"Expected Column Position\": 2,\n \"Expected\": \"test\",\n \"Found\": \"test2\"\n },\n {\n \"Expected Column Position\": 3,\n \"Expected\": \"test3\",\n \"Found\": null\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_list": { + "title": "Column List", + "description": "The column names, in the correct order.", + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "array", + "items": {}, + "uniqueItems": true + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableColumnsToMatchOrderedList" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_columns_to_match_ordered_list" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the columns in a table to exactly match a specified list." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json new file mode 100644 index 000000000000..0a950321cc6a --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json @@ -0,0 +1,258 @@ +{ + "title": "Expect table columns to match set", + "description": "Expect the columns in a table to match an unordered set.\n\nExpectTableColumnsToMatchSet is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_set (list of str): The column names, in any order. In SQL datasources, if the column names are double quoted, for example '\"column_name\"', a case sensitive match is done. Otherwise a case insensitive match is done.\n exact_match (boolean): If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass. Default True.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test\"],\n exact_match=False\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test2\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test2\", \"test3\"],\n exact_match=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test\"\n ],\n \"missing\": [\n \"test3\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "column_set": { + "title": "Column Set", + "description": "The column names, in any order. In SQL datasources, if the column names are double quoted, for example '\"column_name\"', a case sensitive match is done. Otherwise a case insensitive match is done.", + "anyOf": [ + { + "type": "array", + "items": {} + }, + { + "type": "array", + "items": {}, + "uniqueItems": true + }, + { + "type": "object" + } + ] + }, + "exact_match": { + "title": "Exact Match", + "description": "If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass.", + "default": true, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableColumnsToMatchSet" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_columns_to_match_set" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Schema" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the columns in a table to match an unordered set." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json new file mode 100644 index 000000000000..fcdfaaa3d77d --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json @@ -0,0 +1,480 @@ +{ + "title": "Expect table row count to be between", + "description": "Expect the number of rows to be between two values.\n\nExpectTableRowCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of rows, inclusive.\n max_value (int or None): The maximum number of rows, inclusive.\n strict_min (boolean): If True, the row count must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the row count must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n min_value=1,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "min_value": { + "title": "Min Value", + "description": "The minimum number of rows, inclusive.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "max_value": { + "title": "Max Value", + "description": "The maximum number of rows, inclusive.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the row count must be strictly smaller than max_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the row count must be strictly larger than min_value, default=False", + "default": false, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object" + } + ] + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableRowCountToBeBetween" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_row_count_to_be_between" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Volume" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the number of rows to be between two values." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json new file mode 100644 index 000000000000..4099df3ee2ab --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json @@ -0,0 +1,437 @@ +{ + "title": "Expect table row count to equal", + "description": "Expect the number of rows to equal a value.\n\nExpectTableRowCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of rows.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "value": { + "title": "Value", + "description": "The expected number of rows.", + "anyOf": [ + { + "type": "integer" + }, + { + "type": "object" + } + ] + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableRowCountToEqual" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_row_count_to_equal" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Volume" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the number of rows to equal a value." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "MySQL", + "SQL Server", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "Redshift" + ] + } + } + } + }, + "required": [ + "value" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json new file mode 100644 index 000000000000..5a66ca68c5d7 --- /dev/null +++ b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json @@ -0,0 +1,435 @@ +{ + "title": "Expect table row count to equal other table", + "description": "Expect the number of rows to equal the number in another table within the same database.\n\nExpectTableRowCountToEqualOtherTable is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n other_table_name (str): The name of the other table. Other table must be located within the same database.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQL Server](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test_table\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_two\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_three\n test test2\n 0 1.00 2\n 1 2.30 5\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_two\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_three\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "other_table_name": { + "title": "Other Table Name", + "description": "The name of the other table. Other table must be located within the same database.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "row_condition": { + "title": "Row Condition", + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ComparisonCondition" + }, + { + "$ref": "#/definitions/NullityCondition" + }, + { + "$ref": "#/definitions/AndCondition" + }, + { + "$ref": "#/definitions/OrCondition" + }, + { + "$ref": "#/definitions/PassThroughCondition" + } + ] + }, + "condition_parser": { + "title": "Condition Parser", + "enum": [ + "great_expectations", + "great_expectations__experimental__", + "pandas", + "spark" + ], + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "ExpectTableRowCountToEqualOtherTable" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "expect_table_row_count_to_equal_other_table" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "Volume" + ] + }, + "library_metadata": { + "title": "Library Metadata", + "type": "object", + "const": { + "maturity": "production", + "tags": [ + "core expectation", + "table expectation", + "multi-table expectation" + ], + "contributors": [ + "@great_expectations" + ], + "requirements": [], + "has_full_test_suite": true, + "manually_reviewed_code": true + } + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "Expect the number of rows to equal the number in another table within the same database." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "SQLite", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "SQL Server", + "Databricks (SQL)", + "Snowflake" + ] + } + } + } + }, + "required": [ + "other_table_name" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + }, + "Column": { + "title": "Column", + "description": "--Public API--\nSpecify the column in a condition statement.", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Operator": { + "title": "Operator", + "description": "An enumeration.", + "enum": [ + "==", + "!=", + "<", + "<=", + ">", + ">=", + "IN", + "NOT_IN" + ], + "type": "string" + }, + "ComparisonCondition": { + "title": "ComparisonCondition", + "description": "--Public API--Condition representing the comparison of a column with a parameter.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "comparison", + "enum": [ + "comparison" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "operator": { + "$ref": "#/definitions/Operator" + }, + "parameter": { + "title": "Parameter" + } + }, + "required": [ + "column", + "operator", + "parameter" + ] + }, + "NullityCondition": { + "title": "NullityCondition", + "description": "--Public API--Condition representing the whether or not a column is null.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "nullity", + "enum": [ + "nullity" + ], + "type": "string" + }, + "column": { + "$ref": "#/definitions/Column" + }, + "is_null": { + "title": "Is Null", + "type": "boolean" + } + }, + "required": [ + "column", + "is_null" + ] + }, + "Condition": { + "title": "Condition", + "description": "Base class for conditions.", + "type": "object", + "properties": {} + }, + "AndCondition": { + "title": "AndCondition", + "description": "--Public API--Represents an AND condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "and", + "enum": [ + "and" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "OrCondition": { + "title": "OrCondition", + "description": "--Public API--Represents an OR condition composed of multiple conditions.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "or", + "enum": [ + "or" + ], + "type": "string" + }, + "conditions": { + "title": "Conditions", + "type": "array", + "items": { + "$ref": "#/definitions/Condition" + } + } + }, + "required": [ + "conditions" + ] + }, + "PassThroughCondition": { + "title": "PassThroughCondition", + "description": "Condition that passes a filter string directly to the execution engine.\n\nThis is used for legacy pandas/spark condition_parser syntax where the\nrow_condition string is passed directly to DataFrame.query() or DataFrame.filter().", + "type": "object", + "properties": { + "type": { + "title": "Type", + "default": "pass_through", + "enum": [ + "pass_through" + ], + "type": "string" + }, + "pass_through_filter": { + "title": "Pass Through Filter", + "type": "string" + } + }, + "required": [ + "pass_through_filter" + ] + } + } +} diff --git a/great_expectations/expectations/core/schemas/README.md b/great_expectations/expectations/core/schemas/README.md new file mode 100644 index 000000000000..47746ac1fbbb --- /dev/null +++ b/great_expectations/expectations/core/schemas/README.md @@ -0,0 +1,9 @@ +# Expectation JSON Schemas + +## Specification +Expectation JSON schemas should conform to the [JsonSchema7 interface](https://jsonforms.io/api/core/interfaces/jsonschema7). We ensure this by validating each schema using the python [jsonschema](https://python-jsonschema.readthedocs.io/en/stable/) library (e.g. using `Draft7Validator.check_schema()`). + +## Metadata Property +Properties on the Expectation schemas represent class instance variable definitions except for one special property: `metadata` + +The `metadata` property is itself an `object` containing many `properties`. The `metadata` `properties` are all defined by a `const` which does not change from one Expectation instance to another. diff --git a/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json b/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json new file mode 100644 index 000000000000..d8d4b0da6e86 --- /dev/null +++ b/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json @@ -0,0 +1,223 @@ +{ + "title": "Custom Expectation with SQL", + "description": "This Expectation will fail validation if the query returns one or more rows. The WHERE clause defines the fail criteria.\n\nUnexpectedRowsExpectations facilitate the execution of SQL or Spark-SQL queries as the core logic for an Expectation. UnexpectedRowsExpectations must implement a `_validate(...)` method containing logic for determining whether data returned by the executed query is successfully validated. One is written by default, but can be overridden.\n\nA successful validation is one where the unexpected_rows_query returns no rows.\n\nUnexpectedRowsExpectation is a [Batch Expectation](https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations).\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n unexpected_rows_query (str): A SQL or Spark-SQL query to be executed for validation.\n\nOther Parameters:\n severity (str or None): The impact of this Expectation failing: critical, warning, or info. Defaults to critical if not set. Severity levels can be used to trigger different alerting patterns and actions. For more detail, see [failure severity](https://docs.greatexpectations.io/docs/cloud/expectations/expectations_overview/#failure-severity).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\nSupported Data Sources:\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Amazon Aurora PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Citus](https://docs.greatexpectations.io/docs/application_integration_support/)\n [AlloyDB](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Neon](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\nData Quality Issues:\n SQL", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "meta": { + "title": "Meta", + "type": "object" + }, + "notes": { + "title": "Notes", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "result_format": { + "title": "Result Format", + "default": "BASIC", + "anyOf": [ + { + "$ref": "#/definitions/ResultFormat" + }, + { + "type": "object" + } + ] + }, + "description": { + "title": "Description", + "description": "A short description of your Expectation", + "type": "string" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": false, + "type": "boolean" + }, + "rendered_content": { + "title": "Rendered Content", + "type": "array", + "items": { + "type": "object" + } + }, + "severity": { + "description": "Indicate the impact of this Expectation failing. Severity levels can be used to trigger different alerting patterns and actions.", + "default": "critical", + "allOf": [ + { + "$ref": "#/definitions/FailureSeverity" + } + ] + }, + "windows": { + "title": "Windows", + "description": "Definition(s) for evaluation of temporal windows", + "type": "array", + "items": { + "$ref": "#/definitions/Window" + } + }, + "batch_id": { + "title": "Batch Id", + "type": "string" + }, + "unexpected_rows_query": { + "title": "Unexpected Rows Query", + "description": "A SQL or Spark-SQL query to be executed for validation.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + } + ] + }, + "metadata": { + "type": "object", + "properties": { + "expectation_class": { + "title": "Expectation Class", + "type": "string", + "const": "UnexpectedRowsExpectation" + }, + "expectation_type": { + "title": "Expectation Type", + "type": "string", + "const": "unexpected_rows_expectation" + }, + "domain_type": { + "title": "Domain Type", + "type": "string", + "const": "table", + "description": "Batch" + }, + "data_quality_issues": { + "title": "Data Quality Issues", + "type": "array", + "const": [ + "SQL" + ] + }, + "short_description": { + "title": "Short Description", + "type": "string", + "const": "This Expectation will fail validation if the query returns one or more rows. The WHERE clause defines the fail criteria." + }, + "supported_data_sources": { + "title": "Supported Data Sources", + "type": "array", + "const": [ + "Spark", + "PostgreSQL", + "Amazon Aurora PostgreSQL", + "Citus", + "AlloyDB", + "Neon", + "Redshift", + "MySQL", + "BigQuery", + "Snowflake", + "Databricks (SQL)", + "SQL Server" + ] + } + } + } + }, + "required": [ + "unexpected_rows_query" + ], + "additionalProperties": false, + "definitions": { + "ResultFormat": { + "title": "ResultFormat", + "description": "An enumeration.", + "enum": [ + "BOOLEAN_ONLY", + "BASIC", + "COMPLETE", + "SUMMARY" + ], + "type": "string" + }, + "FailureSeverity": { + "title": "FailureSeverity", + "description": "Severity levels for Expectation failures.", + "enum": [ + "critical", + "warning", + "info" + ], + "type": "string" + }, + "Offset": { + "title": "Offset", + "description": "A threshold in which a metric will be considered passable", + "type": "object", + "properties": { + "positive": { + "title": "Positive", + "type": "number" + }, + "negative": { + "title": "Negative", + "type": "number" + } + }, + "required": [ + "positive", + "negative" + ], + "additionalProperties": false + }, + "Window": { + "title": "Window", + "description": "A definition for a temporal window across <`range`> number of previous invocations", + "type": "object", + "properties": { + "constraint_fn": { + "title": "Constraint Fn", + "type": "string" + }, + "parameter_name": { + "title": "Parameter Name", + "type": "string" + }, + "range": { + "title": "Range", + "type": "integer" + }, + "offset": { + "$ref": "#/definitions/Offset" + }, + "strict": { + "title": "Strict", + "default": false, + "type": "boolean" + } + }, + "required": [ + "constraint_fn", + "parameter_name", + "range", + "offset" + ], + "additionalProperties": false + } + } +} diff --git a/great_expectations/expectations/core/schemas/__init__.py b/great_expectations/expectations/core/schemas/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/expectations/core/test_core_model_schemas.py b/tests/expectations/core/test_core_model_schemas.py index 1a2a160c3f51..e7441e6f0af2 100644 --- a/tests/expectations/core/test_core_model_schemas.py +++ b/tests/expectations/core/test_core_model_schemas.py @@ -1,11 +1,29 @@ +import json +from pathlib import Path + +import jsonschema import pytest +from jsonschema import Draft7Validator from great_expectations.expectations import core +from great_expectations.expectations.core import schemas from great_expectations.expectations.expectation import MetaExpectation expectation_dictionary = dict(core.__dict__) +@pytest.fixture +def safer_draft_7_validator() -> type[Draft7Validator]: + validator = Draft7Validator + validator.META_SCHEMA = { + **Draft7Validator.META_SCHEMA, + # this ensures that only specified properties are used (e.g. multipleOf, not multiple_of) + # otherwise, the spec says unspecified properties should be ignored + "additionalProperties": False, + } + return validator + + @pytest.mark.unit def test_all_core_model_schemas_are_serializable(): all_models = [ @@ -17,3 +35,34 @@ def test_all_core_model_schemas_are_serializable(): assert len(all_models) > 50 for model in all_models: model.schema_json() + + +@pytest.mark.filesystem # ~4s +def test_schemas_updated(): + all_models = { + cls_name: expectation + for cls_name, expectation in expectation_dictionary.items() + if isinstance(expectation, MetaExpectation) + } + schema_file_paths = Path(schemas.__file__).parent.glob("*.json") + all_schemas = {file_path.stem: file_path.read_text() for file_path in schema_file_paths} + for cls_name, schema in all_schemas.items(): + # converting to dicts for easier comparision on failure + new_schema = json.loads(all_models[cls_name].schema_json()) + old_schema = json.loads(schema) + assert new_schema == old_schema, "json schemas not updated, run `invoke schemas --sync`" + + +@pytest.mark.unit +def test_schemas_valid_spec(safer_draft_7_validator: type[Draft7Validator]): + # https://json-schema.org/draft-07 + # https://jsonforms.io/api/core/interfaces/jsonschema7 + schema_file_paths = Path(schemas.__file__).parent.glob("*.json") + for file_path in schema_file_paths: + with open(file_path) as schema_file: + try: + safer_draft_7_validator.check_schema(json.load(schema_file)) + except jsonschema.exceptions.SchemaError as e: + raise AssertionError( + f"Invalid json schema for `{file_path.name}`: {e.message}" + ) from e