From bc8817fc5feebff0918b492413881f8d4fee1fff Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Sat, 2 Dec 2023 13:06:32 -0500
Subject: [PATCH 1/4] Added speed tests.

Related to https://github.com/TranslatorSRI/NameResolution/issues/113
---
 tests/nameres/test_nameres_from_gsheet.py | 61 +++++++++++++++++++++--
 1 file changed, 57 insertions(+), 4 deletions(-)

diff --git a/tests/nameres/test_nameres_from_gsheet.py b/tests/nameres/test_nameres_from_gsheet.py
index 5b24ad7..baea458 100644
--- a/tests/nameres/test_nameres_from_gsheet.py
+++ b/tests/nameres/test_nameres_from_gsheet.py
@@ -1,3 +1,5 @@
+import logging
+import time
 import urllib.parse
 import requests
 import pytest
@@ -9,6 +11,16 @@
 
 @pytest.mark.parametrize("test_row", gsheet.test_rows)
 def test_label(target_info, test_row, test_category):
+    """
+    :param target_info: The target_info object (really a config object).
+    :param test_row: A test row to be tested.
+    :param test_category: A function that can be called with a category name to determine whether or not a particular
+        category should be tested.
+    :return: The number of queries generated.
+    """
+
+    count_queries = 0
+
     nameres_url = target_info['NameResURL']
     limit = target_info['NameResLimit']
     nameres_xfail_if_in_top = int(target_info['NameResXFailIfInTop'])
@@ -54,6 +66,7 @@ def test_label(target_info, test_row, test_category):
 
             test_summary = f"querying {nameres_url_lookup} with label '{label}' and biolink_type {biolink_class}"
             response = requests.get(nameres_url_lookup, request)
+            count_queries += 1
 
             assert response.ok, f"Could not send request {request} to GET {nameres_url_lookup}: {response}"
             results = response.json()
@@ -74,7 +87,7 @@ def test_label(target_info, test_row, test_category):
                 else:
                     assert expected_id not in all_curies, f"Negative test {test_summary} did not find expected ID {expected_id} in top {limit} results."
 
-                return
+                return count_queries
 
             # There are three possible responses:
             if not results:
@@ -84,22 +97,62 @@ def test_label(target_info, test_row, test_category):
                 pytest.fail(f"No expected CURIE for {test_summary} from {source_info}: best result is {results[0]}")
             elif results[0]['curie'] == expected_id:
                 top_result = results[0]
-                assert top_result['curie'] == expected_id,\
+                assert top_result['curie'] == expected_id, \
                     f"{test_summary} returned expected ID {expected_id} as top result"
 
                 # Additionally, test the biolink_class_exclude field if there is one.
                 if biolink_class_exclude:
-                    assert biolink_class_exclude not in top_result['types'],\
+                    assert biolink_class_exclude not in top_result['types'], \
                         f"Biolink types for {top_result['curie']} are {top_result['types']}, which includes {biolink_class_exclude} which should be excluded."
 
             elif expected_id in all_curies:
                 expected_index = all_curies.index(expected_id)
 
                 fail_message = f"{test_summary} returns {results[0]['curie']} ('{results[0]['label']}') as the " \
-                    f"top result, but {expected_id} is at {expected_index} index."
+                               f"top result, but {expected_id} is at {expected_index} index."
                 if expected_index <= nameres_xfail_if_in_top:
                     pytest.xfail(fail_message)
                 else:
                     pytest.fail(fail_message)
             else:
                 pytest.fail(f"{test_summary} but expected result {expected_id} not found: {results}")
+
+    return count_queries
+
+
+@pytest.mark.parametrize("category_and_expected_times", [
+    # We expect unit tests to run in less than half a second each query and name.
+    {'category': 'Unit Tests', 'expected_time_per_query': 0.5},
+])
+def test_query_rates(target_info, category_and_expected_times):
+    """
+    This is being done in service of https://github.com/TranslatorSRI/NameResolution/issues/113
+
+    To ensure that we can handle 20 simultaneous queries within 10 seconds, we will run a set of
+    rows from the Google Sheet, and measure the rate at which we process those queries.
+
+    :param target_info: The target_info object (really a config object).
+    """
+
+    category = category_and_expected_times['category']
+    rows_to_test = list(filter(lambda row: row.Category == category, gsheet.test_rows))
+    assert len(rows_to_test) > 0, f"Category '{category}' not found in Google Sheet {gsheet}."
+
+    time_started = time.time_ns()
+    count_queries = 0
+    for row in rows_to_test:
+        count_queries += test_label(target_info, row, lambda cat: True)
+    time_ended = time.time_ns()
+    time_taken = time_ended - time_started
+    time_taken_secs = float(time_taken) / 1e+9
+
+    time_per_test_row = time_taken_secs / len(rows_to_test)
+    time_per_query = time_taken_secs / count_queries
+    print(f"NameRes took {time_taken_secs:.3f} seconds to process {len(rows_to_test)} test rows " +
+          f"({time_per_test_row:.3f} seconds/test row, {time_per_query:.3f} seconds/query) on {target_info}")
+
+    assert len(rows_to_test) > 20, f"Categories with fewer than twenty test rows are not likely to be representative."
+    assert count_queries > 20, f"Categories with fewer than twenty queries are not likely to be representative."
+
+    if 'expected_time_per_query' in category_and_expected_times:
+        assert time_per_query < category_and_expected_times['expected_time_per_query']

From 1973e6deac7823b90a5a9a69cad55811f0b670d5 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Sat, 2 Dec 2023 13:17:21 -0500
Subject: [PATCH 2/4] Added NodeNorm normalization rate testing.

---
 tests/nodenorm/test_nodenorm_from_gsheet.py | 54 ++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/tests/nodenorm/test_nodenorm_from_gsheet.py b/tests/nodenorm/test_nodenorm_from_gsheet.py
index f7dcf2d..058af39 100644
--- a/tests/nodenorm/test_nodenorm_from_gsheet.py
+++ b/tests/nodenorm/test_nodenorm_from_gsheet.py
@@ -1,4 +1,5 @@
 import itertools
+import time
 import urllib.parse
 import requests
 import pytest
@@ -10,6 +11,16 @@
 
 @pytest.mark.parametrize("test_row", gsheet.test_rows)
 def test_normalization(target_info, test_row, test_category):
+    """
+    Test normalization on NodeNorm.
+
+    :param target_info: The target information to test.
+    :param test_row: The TestRow to test.
+    :param test_category: A function that accepts a category name and
+    :return: The number of queries executed.
+    """
+    count_queries = 0
+
     nodenorm_url = target_info['NodeNormURL']
 
     category = test_row.Category
@@ -49,6 +60,7 @@ def test_normalization(target_info, test_row, test_category):
 
         test_summary = f"Queried {query_id} ({preferred_label}) on {nodenorm_url_lookup}"
         response = requests.get(nodenorm_url_lookup, request)
+        count_queries += 1
 
         assert response.ok, f"Could not send request {request} to GET {nodenorm_url_lookup}: {response}"
         results = response.json()
@@ -85,4 +97,44 @@ def test_normalization(target_info, test_row, test_category):
                                                                 f"found in types: {biolink_types}")
             else:
                 assert biolink_type in set(biolink_types), (f"{test_summary} biolink type {biolink_type} not found in "
-                                                            f"types: {biolink_types}")
\ No newline at end of file
+                                                            f"types: {biolink_types}")
+
+    return count_queries
+
+
+@pytest.mark.parametrize("category_and_expected_times", [
+    # We expect unit tests to run in less than half a second each query and name.
+    {'category': 'Unit Tests', 'expected_time_per_query': 0.2},
+])
+def test_normalization_rates(target_info, category_and_expected_times):
+    """
+    This is being done in service of https://github.com/TranslatorSRI/NodeNormalization/issues/205
+
+    To ensure that we can handle 20 simultaneous queries within 10 seconds, we will run a set of
+    rows from the Google Sheet, and measure the rate at which we process those queries.
+
+    :param target_info: The target_info object (really a config object).
+    """
+
+    category = category_and_expected_times['category']
+    rows_to_test = list(filter(lambda row: row.Category == category, gsheet.test_rows))
+    assert len(rows_to_test) > 0, f"Category '{category}' not found in Google Sheet {gsheet}."
+
+    time_started = time.time_ns()
+    count_queries = 0
+    for row in rows_to_test:
+        count_queries += test_normalization(target_info, row, lambda cat: True)
+    time_ended = time.time_ns()
+    time_taken = time_ended - time_started
+    time_taken_secs = float(time_taken) / 1e+9
+
+    time_per_test_row = time_taken_secs / len(rows_to_test)
+    time_per_query = time_taken_secs / count_queries
+    print(f"NodeNorm took {time_taken_secs:.3f} seconds to process {len(rows_to_test)} test rows " +
+          f"({time_per_test_row:.3f} seconds/test row, {time_per_query:.3f} seconds/query) on {target_info}")
+
+    assert len(rows_to_test) > 20, f"Categories with fewer than twenty test rows are not likely to be representative."
+    assert count_queries > 20, f"Categories with fewer than twenty queries are not likely to be representative."
+
+    if 'expected_time_per_query' in category_and_expected_times:
+        assert time_per_query < category_and_expected_times['expected_time_per_query']

From aaacd768cf5a9642094851103dbba71d9c7ebcd9 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Sat, 2 Dec 2023 13:51:11 -0500
Subject: [PATCH 3/4] Added Slow Tests for NameRes.

---
 tests/nameres/test_nameres_from_gsheet.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/nameres/test_nameres_from_gsheet.py b/tests/nameres/test_nameres_from_gsheet.py
index baea458..bf1c73e 100644
--- a/tests/nameres/test_nameres_from_gsheet.py
+++ b/tests/nameres/test_nameres_from_gsheet.py
@@ -123,6 +123,7 @@ def test_label(target_info, test_row, test_category):
 @pytest.mark.parametrize("category_and_expected_times", [
     # We expect unit tests to run in less than half a second each query and name.
     {'category': 'Unit Tests', 'expected_time_per_query': 0.5},
+    {'category': 'Slow Tests', 'expected_time_per_query': 1},
 ])
 def test_query_rates(target_info, category_and_expected_times):
     """

From 15e2784d2be5d665ce5cb664b59cc08860e30127 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Sun, 3 Dec 2023 00:26:22 -0500
Subject: [PATCH 4/4] Commented out slow tests until we can fix them.

---
 tests/nameres/test_nameres_from_gsheet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nameres/test_nameres_from_gsheet.py b/tests/nameres/test_nameres_from_gsheet.py
index bf1c73e..d1be851 100644
--- a/tests/nameres/test_nameres_from_gsheet.py
+++ b/tests/nameres/test_nameres_from_gsheet.py
@@ -123,7 +123,7 @@ def test_label(target_info, test_row, test_category):
 @pytest.mark.parametrize("category_and_expected_times", [
     # We expect unit tests to run in less than half a second each query and name.
     {'category': 'Unit Tests', 'expected_time_per_query': 0.5},
-    {'category': 'Slow Tests', 'expected_time_per_query': 1},
+    # {'category': 'Slow Tests', 'expected_time_per_query': 1},
 ])
 def test_query_rates(target_info, category_and_expected_times):
     """