Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions scripts/importer/validateData.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3492,6 +3492,10 @@ class TimelineValidator(Validator):
REQUIRE_COLUMN_ORDER = True
ALLOW_BLANKS = True

def __init__(self, *args, **kwargs):
super(TimelineValidator, self).__init__(*args, **kwargs)
self.timeline_entries = {}

def checkLine(self, data):
super(TimelineValidator, self).checkLine(data)
# TODO check the values
Expand Down Expand Up @@ -3522,6 +3526,17 @@ def checkLine(self, data):
extra={'line_number': self.line_number,
'column_number': col_index + 1,
'cause': value})
# validate the uniqueness of timeline records
timeline_entry = ", ".join(data)
if timeline_entry in self.timeline_entries:
self.logger.error(
'Duplicate entry in timeline data',
extra = {'line_number': self.line_number,
'cause': '%s (already defined on line %d)' % (
timeline_entry,
self.timeline_entries[timeline_entry])})
else:
self.timeline_entries[timeline_entry] = self.line_number

class CancerTypeValidator(Validator):

Expand Down
5 changes: 5 additions & 0 deletions tests/test_data/data_timeline_duplicated.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
PATIENT_ID START_DATE STOP_DATE EVENT_TYPE SPECIMEN_SITE SPECIMEN_TYPE SOURCE
TCGA-BH-A18K 20 60 SPECIMEN test_specimen_site_1 test_specimen_type test_source_3
TCGA-BH-A18K 20 60 SPECIMEN test_specimen_site_1 test_specimen_type test_source_3
TCGA-BH-A18K 10 20 STATUS test_source_4
TCGA-BH-NEW 100 200 STATUS test_source_1
13 changes: 13 additions & 0 deletions tests/unit_tests_validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,19 @@ def test_start_date_validation_TimelineValidator(self):
self.assertEqual("ERROR", error.levelname)
self.assertIn("Invalid START_DATE", error.getMessage())

def test_duplicated_timeline_lines(self):
"""Raise an error if timeline lines are duplicated.
"""
# set level according to this test case:
self.logger.setLevel(logging.ERROR)
record_list = self.validate('data_timeline_duplicated.txt',
validateData.TimelineValidator)
self.assertEqual(1, len(record_list))
for error in record_list:
self.assertEqual("ERROR", error.levelname)
self.assertIn("Duplicate entry in timeline data", error.getMessage())



# TODO: make tests in this testcase check the number of properly defined types
class CancerTypeFileValidationTestCase(DataFileTestCase):
Expand Down
Loading