From e59b8e4c8c07bc388d1f2c0a0a6a9700f5baf68f Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Fri, 15 May 2020 00:33:21 +0100
Subject: [PATCH 1/6] FIX: hanging stream identification #189 - added simple
 test, with no assert, for file identification; and - added similar for stream
 identification which demonstrates hang.

---
 tests/test_fido.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/test_fido.py b/tests/test_fido.py
index 952a588e..99ef971d 100644
--- a/tests/test_fido.py
+++ b/tests/test_fido.py
@@ -1,13 +1,50 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+from __future__ import print_function
 
+import io
+import tempfile
 from time import sleep
 
+from fido import fido
 from fido.fido import PerfTimer
 
+# Magic number for fmt/1000.
+MAGIC = b"\x5A\x58\x54\x61\x70\x65\x21\x1A\x01"
 
 def test_perf_timer():
     timer = PerfTimer()
     sleep(3.6)
     duration = timer.duration()
     assert duration > 0
+
+def test_file_identification():
+    """Reference for Fido-based format identification
+        1. Create a byte-stream with a known magic number and serialise to tempfile.
+        2. Call identify_file(...) to identify the file against Fido's known formats.
+    """
+    # Create a temporary file on the host operating system.
+    tmp = tempfile.mkstemp()
+    tmp_file = tmp[1]
+
+    # Write to the file our known magic-number.
+    with open(tmp_file, "wb") as new_file:
+        new_file.write(MAGIC)
+
+    # Create a Fido instance and call identify_file. The identify_file function
+    # will create and manage a file for itself.
+    f = fido.Fido()
+    f.identify_file(tmp_file)
+
+def test_stream_identification():
+    """Reference for Fido-based format identification
+        1. Create a byte-stream with a known magic number.
+        2. Call identify_stream(...) to identify the file against Fido's known formats.
+    """
+    # Create the stream object with the known magic-number.
+    fstream = io.BytesIO(MAGIC)
+    # Create a Fido instance and call identify_stream. The identify_stream function
+    # will work on the stream as-is. This could be an open file handle that the
+    # caller is managing for itself.
+    f = fido.Fido()
+    f.identify_stream(fstream, "filename to display", extension=False)

From 14932c5b14a1e0e02f3b9cff094589a2a8d9e0b5 Mon Sep 17 00:00:00 2001
From: Carl Wilson <carl@openpreservation.org>
Date: Fri, 15 May 2020 02:00:33 +0100
Subject: [PATCH 2/6] FIX: Stream identification hang #189 - fixed termination
 condition in `blocking_read`.

---
 fido/fido.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fido/fido.py b/fido/fido.py
index a4cdf3f5..97785cb0 100755
--- a/fido/fido.py
+++ b/fido/fido.py
@@ -512,10 +512,11 @@ def blocking_read(self, file, bytes_to_read):
         buffer = b''
         while bytes_read < bytes_to_read:
             readbuffer = file.read(bytes_to_read - bytes_read)
+            last_read_len = len(readbuffer)
             buffer += readbuffer
-            bytes_read = len(buffer)
-            # break out if EOF is reached.
-            if readbuffer == '':
+            bytes_read += last_read_len
+            # break out if EOF is reached, that is zero bytes read.
+            if last_read_len < 1:
                 break
         return buffer
 

From 18de5c0aa799ccd825f330a21055c930d1e4ad0b Mon Sep 17 00:00:00 2001
From: Ross Spencer <all.along.the.watchtower2001@gmail.com>
Date: Sun, 24 May 2020 22:56:26 -0400
Subject: [PATCH 3/6] Use pytest tmp_path

---
 tests/test_fido.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tests/test_fido.py b/tests/test_fido.py
index 99ef971d..a9b78308 100644
--- a/tests/test_fido.py
+++ b/tests/test_fido.py
@@ -3,7 +3,6 @@
 from __future__ import print_function
 
 import io
-import tempfile
 from time import sleep
 
 from fido import fido
@@ -18,23 +17,19 @@ def test_perf_timer():
     duration = timer.duration()
     assert duration > 0
 
-def test_file_identification():
+def test_file_identification(tmp_path):
     """Reference for Fido-based format identification
         1. Create a byte-stream with a known magic number and serialise to tempfile.
         2. Call identify_file(...) to identify the file against Fido's known formats.
     """
-    # Create a temporary file on the host operating system.
-    tmp = tempfile.mkstemp()
-    tmp_file = tmp[1]
-
-    # Write to the file our known magic-number.
-    with open(tmp_file, "wb") as new_file:
-        new_file.write(MAGIC)
+    # Create a temporary file and write our skeleton file out to it.
+    tmp_file = tmp_path / "tmp_file"
+    tmp_file.write_bytes(MAGIC)
 
     # Create a Fido instance and call identify_file. The identify_file function
     # will create and manage a file for itself.
     f = fido.Fido()
-    f.identify_file(tmp_file)
+    f.identify_file(str(tmp_file))
 
 def test_stream_identification():
     """Reference for Fido-based format identification

From 1b368b2159cacf9644c1fede6469df6fab40090a Mon Sep 17 00:00:00 2001
From: Ross Spencer <all.along.the.watchtower2001@gmail.com>
Date: Sun, 24 May 2020 22:56:53 -0400
Subject: [PATCH 4/6] Use Black formatting

---
 tests/test_fido.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_fido.py b/tests/test_fido.py
index a9b78308..b315ac81 100644
--- a/tests/test_fido.py
+++ b/tests/test_fido.py
@@ -11,12 +11,14 @@
 # Magic number for fmt/1000.
 MAGIC = b"\x5A\x58\x54\x61\x70\x65\x21\x1A\x01"
 
+
 def test_perf_timer():
     timer = PerfTimer()
     sleep(3.6)
     duration = timer.duration()
     assert duration > 0
 
+
 def test_file_identification(tmp_path):
     """Reference for Fido-based format identification
         1. Create a byte-stream with a known magic number and serialise to tempfile.
@@ -31,6 +33,7 @@ def test_file_identification(tmp_path):
     f = fido.Fido()
     f.identify_file(str(tmp_file))
 
+
 def test_stream_identification():
     """Reference for Fido-based format identification
         1. Create a byte-stream with a known magic number.

From 3b9e73f0bbd40590575176d137cd2e8e11cbb6a8 Mon Sep 17 00:00:00 2001
From: Ross Spencer <all.along.the.watchtower2001@gmail.com>
Date: Sun, 24 May 2020 22:57:27 -0400
Subject: [PATCH 5/6] Consume FIDO result as CSV and make assertions

Bring in a CSV reader to consume the FIDO output and make various
assertions about the result's validity.
---
 tests/test_fido.py | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/tests/test_fido.py b/tests/test_fido.py
index b315ac81..6de9eb47 100644
--- a/tests/test_fido.py
+++ b/tests/test_fido.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import print_function
 
+import csv
 import io
 from time import sleep
 
@@ -11,6 +12,12 @@
 # Magic number for fmt/1000.
 MAGIC = b"\x5A\x58\x54\x61\x70\x65\x21\x1A\x01"
 
+# Expected positive PUID.
+PUID = "fmt/1000"
+
+# Expected result.
+OK = "OK"
+
 
 def test_perf_timer():
     timer = PerfTimer()
@@ -19,9 +26,9 @@ def test_perf_timer():
     assert duration > 0
 
 
-def test_file_identification(tmp_path):
+def test_file_identification(tmp_path, capsys):
     """Reference for Fido-based format identification
-        1. Create a byte-stream with a known magic number and serialise to tempfile.
+        1. Create a byte-stream with a known magic number and serialize to tempfile.
         2. Call identify_file(...) to identify the file against Fido's known formats.
     """
     # Create a temporary file and write our skeleton file out to it.
@@ -33,16 +40,39 @@ def test_file_identification(tmp_path):
     f = fido.Fido()
     f.identify_file(str(tmp_file))
 
+    # Capture the stdout returned by Fido and make assertions about its
+    # validity.
+    captured = capsys.readouterr()
+    assert captured.err == ""
+    reader = csv.reader(io.StringIO(captured.out), delimiter=",")
+    assert reader is not None
+    row = next(reader)
+    assert row[0] == OK, "row hasn't returned a positive identification"
+    assert row[2] == PUID, "row doesn't contain expected PUID value"
+    assert int(row[5]) == len(MAGIC), "row doesn't contain stream length"
+
 
-def test_stream_identification():
+def test_stream_identification(capsys):
     """Reference for Fido-based format identification
         1. Create a byte-stream with a known magic number.
         2. Call identify_stream(...) to identify the file against Fido's known formats.
     """
     # Create the stream object with the known magic-number.
     fstream = io.BytesIO(MAGIC)
+
     # Create a Fido instance and call identify_stream. The identify_stream function
     # will work on the stream as-is. This could be an open file handle that the
     # caller is managing for itself.
     f = fido.Fido()
     f.identify_stream(fstream, "filename to display", extension=False)
+
+    # Capture the stdout returned by Fido and make assertions about its
+    # validity.
+    captured = capsys.readouterr()
+    assert captured.err == ""
+    reader = csv.reader(io.StringIO(captured.out), delimiter=",")
+    assert reader is not None
+    row = next(reader)
+    assert row[0] == OK, "row hasn't returned a positive identification"
+    assert row[2] == PUID, "row doesn't contain expected PUID value"
+    assert int(row[5]) == len(MAGIC), "row doesn't contain stream length"

From 1daddea89a313aebfc0866b09b1155e00074fb9d Mon Sep 17 00:00:00 2001
From: Ross Spencer <all.along.the.watchtower2001@gmail.com>
Date: Sun, 24 May 2020 23:22:48 -0400
Subject: [PATCH 6/6] Correct Flake8 E225 missing whitespace warning

---
 fido/fido.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fido/fido.py b/fido/fido.py
index 97785cb0..dbcc2103 100755
--- a/fido/fido.py
+++ b/fido/fido.py
@@ -251,7 +251,7 @@ def get_signatures(self, format):
         return format.findall('signature')
 
     def has_priority_over(self, format, possibly_inferior):
-        return self.get_puid(possibly_inferior)in self.puid_has_priority_over_map[self.get_puid(format)]
+        return self.get_puid(possibly_inferior) in self.puid_has_priority_over_map[self.get_puid(format)]
 
     def get_puid(self, format):
         return format.find('puid').text