From e59b8e4c8c07bc388d1f2c0a0a6a9700f5baf68f Mon Sep 17 00:00:00 2001 From: Carl Wilson Date: Fri, 15 May 2020 00:33:21 +0100 Subject: [PATCH 1/6] FIX: hanging stream identification #189 - added simple test, with no assert, for file identification; and - added similar for stream identification which demonstrates hang. --- tests/test_fido.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/test_fido.py b/tests/test_fido.py index 952a588e..99ef971d 100644 --- a/tests/test_fido.py +++ b/tests/test_fido.py @@ -1,13 +1,50 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import print_function +import io +import tempfile from time import sleep +from fido import fido from fido.fido import PerfTimer +# Magic number for fmt/1000. +MAGIC = b"\x5A\x58\x54\x61\x70\x65\x21\x1A\x01" def test_perf_timer(): timer = PerfTimer() sleep(3.6) duration = timer.duration() assert duration > 0 + +def test_file_identification(): + """Reference for Fido-based format identification + 1. Create a byte-stream with a known magic number and serialise to tempfile. + 2. Call identify_file(...) to identify the file against Fido's known formats. + """ + # Create a temporary file on the host operating system. + tmp = tempfile.mkstemp() + tmp_file = tmp[1] + + # Write to the file our known magic-number. + with open(tmp_file, "wb") as new_file: + new_file.write(MAGIC) + + # Create a Fido instance and call identify_file. The identify_file function + # will create and manage a file for itself. + f = fido.Fido() + f.identify_file(tmp_file) + +def test_stream_identification(): + """Reference for Fido-based format identification + 1. Create a byte-stream with a known magic number. + 2. Call identify_stream(...) to identify the file against Fido's known formats. + """ + # Create the stream object with the known magic-number. + fstream = io.BytesIO(MAGIC) + # Create a Fido instance and call identify_stream. The identify_stream function + # will work on the stream as-is. This could be an open file handle that the + # caller is managing for itself. + f = fido.Fido() + f.identify_stream(fstream, "filename to display", extension=False) From 14932c5b14a1e0e02f3b9cff094589a2a8d9e0b5 Mon Sep 17 00:00:00 2001 From: Carl Wilson Date: Fri, 15 May 2020 02:00:33 +0100 Subject: [PATCH 2/6] FIX: Stream identification hang #189 - fixed termination condition in `blocking_read`. --- fido/fido.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fido/fido.py b/fido/fido.py index a4cdf3f5..97785cb0 100755 --- a/fido/fido.py +++ b/fido/fido.py @@ -512,10 +512,11 @@ def blocking_read(self, file, bytes_to_read): buffer = b'' while bytes_read < bytes_to_read: readbuffer = file.read(bytes_to_read - bytes_read) + last_read_len = len(readbuffer) buffer += readbuffer - bytes_read = len(buffer) - # break out if EOF is reached. - if readbuffer == '': + bytes_read += last_read_len + # break out if EOF is reached, that is zero bytes read. + if last_read_len < 1: break return buffer From 18de5c0aa799ccd825f330a21055c930d1e4ad0b Mon Sep 17 00:00:00 2001 From: Ross Spencer Date: Sun, 24 May 2020 22:56:26 -0400 Subject: [PATCH 3/6] Use pytest tmp_path --- tests/test_fido.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/test_fido.py b/tests/test_fido.py index 99ef971d..a9b78308 100644 --- a/tests/test_fido.py +++ b/tests/test_fido.py @@ -3,7 +3,6 @@ from __future__ import print_function import io -import tempfile from time import sleep from fido import fido @@ -18,23 +17,19 @@ def test_perf_timer(): duration = timer.duration() assert duration > 0 -def test_file_identification(): +def test_file_identification(tmp_path): """Reference for Fido-based format identification 1. Create a byte-stream with a known magic number and serialise to tempfile. 2. Call identify_file(...) to identify the file against Fido's known formats. """ - # Create a temporary file on the host operating system. - tmp = tempfile.mkstemp() - tmp_file = tmp[1] - - # Write to the file our known magic-number. - with open(tmp_file, "wb") as new_file: - new_file.write(MAGIC) + # Create a temporary file and write our skeleton file out to it. + tmp_file = tmp_path / "tmp_file" + tmp_file.write_bytes(MAGIC) # Create a Fido instance and call identify_file. The identify_file function # will create and manage a file for itself. f = fido.Fido() - f.identify_file(tmp_file) + f.identify_file(str(tmp_file)) def test_stream_identification(): """Reference for Fido-based format identification From 1b368b2159cacf9644c1fede6469df6fab40090a Mon Sep 17 00:00:00 2001 From: Ross Spencer Date: Sun, 24 May 2020 22:56:53 -0400 Subject: [PATCH 4/6] Use Black formatting --- tests/test_fido.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_fido.py b/tests/test_fido.py index a9b78308..b315ac81 100644 --- a/tests/test_fido.py +++ b/tests/test_fido.py @@ -11,12 +11,14 @@ # Magic number for fmt/1000. MAGIC = b"\x5A\x58\x54\x61\x70\x65\x21\x1A\x01" + def test_perf_timer(): timer = PerfTimer() sleep(3.6) duration = timer.duration() assert duration > 0 + def test_file_identification(tmp_path): """Reference for Fido-based format identification 1. Create a byte-stream with a known magic number and serialise to tempfile. @@ -31,6 +33,7 @@ def test_file_identification(tmp_path): f = fido.Fido() f.identify_file(str(tmp_file)) + def test_stream_identification(): """Reference for Fido-based format identification 1. Create a byte-stream with a known magic number. From 3b9e73f0bbd40590575176d137cd2e8e11cbb6a8 Mon Sep 17 00:00:00 2001 From: Ross Spencer Date: Sun, 24 May 2020 22:57:27 -0400 Subject: [PATCH 5/6] Consume FIDO result as CSV and make assertions Bring in a CSV reader to consume the FIDO output and make various assertions about the result's validity. --- tests/test_fido.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/tests/test_fido.py b/tests/test_fido.py index b315ac81..6de9eb47 100644 --- a/tests/test_fido.py +++ b/tests/test_fido.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from __future__ import print_function +import csv import io from time import sleep @@ -11,6 +12,12 @@ # Magic number for fmt/1000. MAGIC = b"\x5A\x58\x54\x61\x70\x65\x21\x1A\x01" +# Expected positive PUID. +PUID = "fmt/1000" + +# Expected result. +OK = "OK" + def test_perf_timer(): timer = PerfTimer() @@ -19,9 +26,9 @@ def test_perf_timer(): assert duration > 0 -def test_file_identification(tmp_path): +def test_file_identification(tmp_path, capsys): """Reference for Fido-based format identification - 1. Create a byte-stream with a known magic number and serialise to tempfile. + 1. Create a byte-stream with a known magic number and serialize to tempfile. 2. Call identify_file(...) to identify the file against Fido's known formats. """ # Create a temporary file and write our skeleton file out to it. @@ -33,16 +40,39 @@ def test_file_identification(tmp_path): f = fido.Fido() f.identify_file(str(tmp_file)) + # Capture the stdout returned by Fido and make assertions about its + # validity. + captured = capsys.readouterr() + assert captured.err == "" + reader = csv.reader(io.StringIO(captured.out), delimiter=",") + assert reader is not None + row = next(reader) + assert row[0] == OK, "row hasn't returned a positive identification" + assert row[2] == PUID, "row doesn't contain expected PUID value" + assert int(row[5]) == len(MAGIC), "row doesn't contain stream length" + -def test_stream_identification(): +def test_stream_identification(capsys): """Reference for Fido-based format identification 1. Create a byte-stream with a known magic number. 2. Call identify_stream(...) to identify the file against Fido's known formats. """ # Create the stream object with the known magic-number. fstream = io.BytesIO(MAGIC) + # Create a Fido instance and call identify_stream. The identify_stream function # will work on the stream as-is. This could be an open file handle that the # caller is managing for itself. f = fido.Fido() f.identify_stream(fstream, "filename to display", extension=False) + + # Capture the stdout returned by Fido and make assertions about its + # validity. + captured = capsys.readouterr() + assert captured.err == "" + reader = csv.reader(io.StringIO(captured.out), delimiter=",") + assert reader is not None + row = next(reader) + assert row[0] == OK, "row hasn't returned a positive identification" + assert row[2] == PUID, "row doesn't contain expected PUID value" + assert int(row[5]) == len(MAGIC), "row doesn't contain stream length" From 1daddea89a313aebfc0866b09b1155e00074fb9d Mon Sep 17 00:00:00 2001 From: Ross Spencer Date: Sun, 24 May 2020 23:22:48 -0400 Subject: [PATCH 6/6] Correct Flake8 E225 missing whitespace warning --- fido/fido.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fido/fido.py b/fido/fido.py index 97785cb0..dbcc2103 100755 --- a/fido/fido.py +++ b/fido/fido.py @@ -251,7 +251,7 @@ def get_signatures(self, format): return format.findall('signature') def has_priority_over(self, format, possibly_inferior): - return self.get_puid(possibly_inferior)in self.puid_has_priority_over_map[self.get_puid(format)] + return self.get_puid(possibly_inferior) in self.puid_has_priority_over_map[self.get_puid(format)] def get_puid(self, format): return format.find('puid').text