-
Notifications
You must be signed in to change notification settings - Fork 5
Fix Number fields store as text #1251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
BibianaC
wants to merge
1
commit into
live
Choose a base branch
from
bc/num-fields-store-as-text
base: live
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+170
−6
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,161 @@ | ||
| import time | ||
| import os | ||
|
|
||
| from django.test import TestCase, Client | ||
|
|
||
| from dataload.import_to_elasticsearch import import_to_elasticsearch | ||
|
|
||
| prefix = os.path.join(os.path.dirname(__file__), "data") | ||
|
|
||
|
|
||
| class NumericFieldSearchTest(TestCase): | ||
| """ | ||
| Tests for searching numeric fields using range and comparison operators. | ||
| """ | ||
|
|
||
| @classmethod | ||
| def setUpClass(cls): | ||
| super().setUpClass() | ||
| import_to_elasticsearch( | ||
| [ | ||
| os.path.join(prefix, "a002400000KeYdsAAF.json"), | ||
| os.path.join(prefix, "grantnav-20180903134856.json"), | ||
| os.path.join(prefix, "a002400000nO46WAAS.json"), | ||
| ], | ||
| clean=True, | ||
| funders=os.path.join(prefix, "funders.jsonl"), | ||
| recipients=os.path.join(prefix, "recipients.jsonl"), | ||
| ) | ||
| time.sleep(5) | ||
|
|
||
| def setUp(self): | ||
| """ | ||
| This setUp method will be executed before each test method. | ||
| """ | ||
| self.client = Client() | ||
|
|
||
| def test_planned_dates_duration_range_query(self): | ||
| """ | ||
| Test that range queries work on plannedDates.duration field | ||
| """ | ||
|
|
||
| # Query for durations between 12 and 36 months | ||
| r = self.client.get('/search?query=plannedDates.duration%3A+%5B12+TO+36%5D', follow=True) | ||
| self.assertEqual(r.status_code, 200) | ||
|
|
||
| # Should return results | ||
| total_hits = r.context["results"]["hits"]["total"]["value"] | ||
| self.assertGreater(total_hits, 0, "Range query should return results for durations between 12 and 36 months") | ||
|
|
||
| def test_planned_dates_duration_greater_than(self): | ||
| """ | ||
| Test that comparison operators work on plannedDates.duration field | ||
| """ | ||
|
|
||
| # Query for durations greater than 12 months | ||
| r = self.client.get('/search?query=plannedDates.duration%3A%3E12', follow=True) | ||
| self.assertEqual(r.status_code, 200) | ||
| total_hits = r.context["results"]["hits"]["total"]["value"] | ||
| self.assertGreater(total_hits, 0, "Query should return results for durations > 12") | ||
|
|
||
| # Verify all results actually have duration > 12 | ||
| for hit in r.context["results"]["hits"]["hits"]: | ||
| if "plannedDates" in hit["_source"] and hit["_source"]["plannedDates"]: | ||
| for planned_date in hit["_source"]["plannedDates"]: | ||
| if "duration" in planned_date: | ||
| duration = planned_date["duration"] | ||
| # Convert to int if it's a string (Elasticsearch might return it as a string if it's stored as text) | ||
| if isinstance(duration, str): | ||
| duration = int(duration) | ||
| self.assertGreater(duration, 12, | ||
| f"Result has duration {duration} which is not > 12") | ||
|
|
||
| def test_planned_dates_duration_less_than(self): | ||
| """ | ||
| Test that less-than operators work on plannedDates.duration field | ||
| """ | ||
|
|
||
| # Query for durations less than 36 months | ||
| r = self.client.get('/search?query=plannedDates.duration%3A%3C36', follow=True) | ||
| self.assertEqual(r.status_code, 200) | ||
|
|
||
| # Should return results | ||
| total_hits = r.context["results"]["hits"]["total"]["value"] | ||
| self.assertGreater(total_hits, 0, "Query should return results for durations < 36") | ||
|
|
||
| def test_actual_dates_duration_range_query(self): | ||
| """ | ||
| Test that range queries work on actualDates.duration field | ||
| """ | ||
|
|
||
| # Query for durations between 12 and 36 months | ||
| r = self.client.get('/search?query=actualDates.duration%3A+%5B12+TO+36%5D', follow=True) | ||
| self.assertEqual(r.status_code, 200) | ||
|
|
||
| # Should return results if any grants have actual dates | ||
| total_hits = r.context["results"]["hits"]["total"]["value"] | ||
|
|
||
| # This might be 0 if test data doesn't have actualDates with durations | ||
| self.assertIsInstance(total_hits, int, "Query should execute without error") | ||
|
|
||
| def test_recipient_org_latest_income_greater_than(self): | ||
| """ | ||
| Test that comparison operators work on additional_data.recipientOrgInfos.latestIncome field | ||
| """ | ||
|
|
||
| min_income = 100000 | ||
|
|
||
| # Query for organisations with latest income greater than 100,000 | ||
| r = self.client.get(f'/search?query=additional_data.recipientOrgInfos.latestIncome%3A%3E{min_income}', follow=True) | ||
|
|
||
| self.assertEqual(r.status_code, 200) | ||
|
|
||
| # Should return results | ||
| results = r.context["results"] | ||
| total_hits = results["hits"]["total"]["value"] | ||
|
|
||
| self.assertGreater(total_hits, 0, | ||
| "Query should return results for latest income > 100,000") | ||
|
|
||
| # Verify returned latestIncome values are greater than the threshold | ||
| for hit in results["hits"]["hits"]: | ||
| recipient_org_infos = ( | ||
| hit["_source"] | ||
| .get("additional_data", {}) | ||
| .get("recipientOrgInfos", []) | ||
| ) | ||
| for recipient_org in recipient_org_infos: | ||
| if "latestIncome" in recipient_org: | ||
| latest_income = int(recipient_org["latestIncome"]) | ||
| self.assertGreater(latest_income, min_income) | ||
|
|
||
| def test_recipient_org_latest_income_range_query(self): | ||
| """ | ||
| Test that range queries work on additional_data.recipientOrgInfos.latestIncome field | ||
| """ | ||
|
|
||
| min_income = 10000 | ||
| max_income = 500000 | ||
|
|
||
| # Query for organisations with latest income between 10,000 and 500,000 | ||
| r = self.client.get(f'/search?query=additional_data.recipientOrgInfos.latestIncome%3A+%5B{min_income}+TO+{max_income}%5D', | ||
| follow=True) | ||
| self.assertEqual(r.status_code, 200) | ||
|
|
||
| results = r.context["results"] | ||
| total_hits = results["hits"]["total"]["value"] | ||
|
|
||
| self.assertGreater(total_hits, 0, | ||
| "Range query should return results for income between 10,000 and 500,000") | ||
|
|
||
| # Verify returned latestIncome values are between 10,000 and 500,000 | ||
| for hit in results["hits"]["hits"]: | ||
| recipientOrgInfos = ( | ||
| hit["_source"] | ||
| .get("additional_data", {}) | ||
| .get("recipientOrgInfos", []) | ||
| ) | ||
| for recipientOrg in recipientOrgInfos: | ||
| if "latestIncome" in recipientOrg: | ||
| self.assertLessEqual(int(recipientOrg["latestIncome"]), max_income) | ||
| self.assertGreaterEqual(int(recipientOrg["latestIncome"]), min_income) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we add something here that checks the
latestIncomefields to make sure they match the range? For example,There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In principle such a check could also be added to the income greater than, and the durations extending similarly.