Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions scripts/importer/validateData.py
Original file line number Diff line number Diff line change
Expand Up @@ -3879,10 +3879,40 @@ def checkLine(self, data):
'column_number': col_index + 1,
'cause': value})

if col_name == 'PRIORITY':
if value.strip() and value.strip().lower() not in self.NULL_VALUES:
try:
int(value.strip())
except ValueError:
self.logger.error(
'wrong value of PRIORITY, the value should be an integer',
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})

if col_name == 'METADATA':
metadata_value = value.strip()
if metadata_value and metadata_value.lower() not in self.NULL_VALUES:
try:
json.loads(metadata_value)
except json.JSONDecodeError:
self.logger.error(
'Invalid JSON in METADATA column.',
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})
except Exception:
self.logger.error(
'Error processing METADATA column.',
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})

class SampleResourceValidator(ResourceValidator):
"""Validator for files defining and setting sample-level attributes."""

REQUIRED_HEADERS = ['SAMPLE_ID', 'PATIENT_ID', 'RESOURCE_ID', 'URL']
OPTIONAL_HEADERS = ['DISPLAY_NAME', 'TYPE', 'GROUP_PATH', 'METADATA', 'PRIORITY']

def __init__(self, *args, **kwargs):
"""Initialize a SampleResourceValidator with the given parameters."""
Expand Down Expand Up @@ -3946,6 +3976,7 @@ def checkLine(self, data):
class PatientResourceValidator(ResourceValidator):

REQUIRED_HEADERS = ['PATIENT_ID', 'RESOURCE_ID', 'URL']
OPTIONAL_HEADERS = ['DISPLAY_NAME', 'TYPE', 'GROUP_PATH', 'METADATA', 'PRIORITY']

def __init__(self, *args, **kwargs):
"""Initialize a PatientResourceValidator with the given parameters."""
Expand Down Expand Up @@ -3998,6 +4029,7 @@ def checkLine(self, data):
class StudyResourceValidator(ResourceValidator):

REQUIRED_HEADERS = ['RESOURCE_ID', 'URL']
OPTIONAL_HEADERS = ['DISPLAY_NAME', 'TYPE', 'GROUP_PATH', 'METADATA', 'PRIORITY']

def __init__(self, *args, **kwargs):
"""Initialize a StudyResourceValidator with the given parameters."""
Expand Down
4 changes: 4 additions & 0 deletions tests/test_data/data_resource_patient_invalid_metadata.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
PATIENT_ID RESOURCE_ID URL DISPLAY_NAME TYPE GROUP_PATH METADATA PRIORITY
TCGA-A2-A04P PATIENT_NOTES http://url-to-patient-notes-patient1 Valid JSON CT 2023 {"modality":"CT","slices":120} 0
TCGA-A1-A0SK PATIENT_NOTES http://url-to-patient-notes-patient2 Invalid JSON CT 2023 {"modality":"CT","slices":120,} 0
TCGA-A2-A0CM PATIENT_NOTES http://url-to-patient-notes-patient3 Empty Metadata CT 2023 0
4 changes: 4 additions & 0 deletions tests/test_data/data_resource_patient_invalid_priority.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
PATIENT_ID RESOURCE_ID URL DISPLAY_NAME TYPE GROUP_PATH METADATA PRIORITY
TCGA-A2-A04P PATIENT_NOTES http://url-to-patient-notes-patient1 Valid Priority CT 2023 0
TCGA-A1-A0SK PATIENT_NOTES http://url-to-patient-notes-patient2 Invalid Priority CT 2023 not_an_integer
TCGA-A2-A0CM PATIENT_NOTES http://url-to-patient-notes-patient3 Empty Priority CT 2023
8 changes: 4 additions & 4 deletions tests/test_data/data_resource_patient_valid.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PATIENT_ID RESOURCE_ID URL
TCGA-A2-A04P PATIENT_NOTES http://url-to-patient-notes-patient1
TCGA-A1-A0SK PATIENT_NOTES http://url-to-patient-notes-patient2
TCGA-A2-A0CM PATIENT_NOTES http://url-to-patient-notes-patient3
PATIENT_ID RESOURCE_ID URL DISPLAY_NAME TYPE GROUP_PATH METADATA PRIORITY
TCGA-A2-A04P PATIENT_NOTES http://url-to-patient-notes-patient1 Biopsy Report PATH_REPORT 2023 0
TCGA-A1-A0SK PATIENT_NOTES http://url-to-patient-notes-patient2 CT Instance CT CT 2023-01-15/Series 1: Axial T2 {"modality":"CT","slices":120} 0
TCGA-A2-A0CM PATIENT_NOTES http://url-to-patient-notes-patient3 0
8 changes: 4 additions & 4 deletions tests/test_data/data_resource_sample_valid.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PATIENT_ID SAMPLE_ID RESOURCE_ID URL
TCGA-A2-A04P TCGA-A2-A04P-01 PATHOLOGY_SLIDE http://url-to-slide-sample1
TCGA-A1-A0SK TCGA-A1-A0SK-01 PATHOLOGY_SLIDE http://url-to-slide-sample2
TCGA-A2-A0CM TCGA-A2-A0CM-01 PATHOLOGY_SLIDE http://url-to-slide-sample3
PATIENT_ID SAMPLE_ID RESOURCE_ID URL DISPLAY_NAME TYPE GROUP_PATH METADATA PRIORITY
TCGA-A2-A04P TCGA-A2-A04P-01 PATHOLOGY_SLIDE http://url-to-slide-sample1 H&E H_AND_E Block A – Primary Tumor {"stain":"hematoxylin","magnification":"20x"} 0
TCGA-A1-A0SK TCGA-A1-A0SK-01 PATHOLOGY_SLIDE http://url-to-slide-sample2 IHC CD3 IHC Block A – Primary Tumor {"antibody":"CD3","clone":"SP7"} 1
TCGA-A2-A0CM TCGA-A2-A0CM-01 PATHOLOGY_SLIDE http://url-to-slide-sample3 H&E H_AND_E 0
6 changes: 3 additions & 3 deletions tests/test_data/data_resource_study_valid.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
RESOURCE_ID URL
STUDY_SPONSORS http://url-to-study-sponsors1
STUDY_SPONSORS http://url-to-study-sponsors2
RESOURCE_ID URL DISPLAY_NAME TYPE GROUP_PATH METADATA PRIORITY
STUDY_SPONSORS http://url-to-study-sponsors1 Study Sponsors 0
STUDY_SPONSORS http://url-to-study-sponsors2 Study Sponsors 2 1
28 changes: 28 additions & 0 deletions tests/unit_tests_validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3027,6 +3027,34 @@ def test_study_resource_has_duplication(self):
record = record_list.pop()
self.assertEqual(logging.ERROR, record.levelno)
self.assertIn('Duplicated resources found', record.getMessage())

# optional column tests
def test_patient_resource_invalid_metadata(self):
validateData.RESOURCE_DEFINITION_DICTIONARY = {'PATIENT_NOTES': ['PATIENT']}
self.logger.setLevel(logging.ERROR)
record_list = self.validate('data_resource_patient_invalid_metadata.txt',
validateData.PatientResourceValidator)

self.assertEqual(1, len(record_list))
record = record_list.pop()
self.assertEqual(logging.ERROR, record.levelno)
self.assertIn('Invalid JSON in METADATA column', record.getMessage())
self.assertEqual(record.cause, '{"modality":"CT","slices":120,}')
validateData.RESOURCE_DEFINITION_DICTIONARY = {}

def test_patient_resource_invalid_priority(self):
validateData.RESOURCE_DEFINITION_DICTIONARY = {'PATIENT_NOTES': ['PATIENT']}
self.logger.setLevel(logging.ERROR)
record_list = self.validate('data_resource_patient_invalid_priority.txt',
validateData.PatientResourceValidator)

self.assertEqual(1, len(record_list))
record = record_list.pop()
self.assertEqual(logging.ERROR, record.levelno)
self.assertIn('wrong value of PRIORITY', record.getMessage())
self.assertEqual(record.cause, 'not_an_integer')
validateData.RESOURCE_DEFINITION_DICTIONARY = {}

# -------------------------- end resource definition wise test ----------------------------

# --------------------------- generic assay test ------------------------------
Expand Down
Loading