diff --git a/dataload/rdf2json/dependency-reduced-pom.xml b/dataload/rdf2json/dependency-reduced-pom.xml index 848ef35fc..577ca8460 100644 --- a/dataload/rdf2json/dependency-reduced-pom.xml +++ b/dataload/rdf2json/dependency-reduced-pom.xml @@ -53,6 +53,26 @@ pom compile + + org.junit.jupiter + junit-jupiter + 5.10.2 + test + + + junit-jupiter-api + org.junit.jupiter + + + junit-jupiter-params + org.junit.jupiter + + + junit-jupiter-engine + org.junit.jupiter + + + 11 diff --git a/dataload/rdf2json/pom.xml b/dataload/rdf2json/pom.xml index 51184aac1..1f7c24d3c 100644 --- a/dataload/rdf2json/pom.xml +++ b/dataload/rdf2json/pom.xml @@ -45,6 +45,13 @@ 1.4.11 + + + org.junit.jupiter + junit-jupiter + 5.10.2 + test + com.sun.mail @@ -64,6 +71,7 @@ commons-cli commons-cli 1.4 + diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java index fa4f2002f..bb7a9cc56 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java @@ -12,6 +12,7 @@ public class ShortFormAnnotator { private static final Logger logger = LoggerFactory.getLogger(ShortFormAnnotator.class); + private static final String oboPurlPrefix = "http://purl.obolibrary.org/obo/"; public static void annotateShortForms(OntologyGraph graph) { @@ -36,7 +37,7 @@ public static void annotateShortForms(OntologyGraph graph) { preferredPrefix = graph.config.get("id").toString().toUpperCase(); } - String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri); + String shortForm = extractShortForm(ontologyBaseUris, preferredPrefix, c.uri); /* CURIEs are formed by following rules: @@ -75,16 +76,26 @@ public static void annotateShortForms(OntologyGraph graph) { } - private static String extractShortForm(OntologyGraph graph, Set ontologyBaseUris, String preferredPrefix, + public static String extractShortForm(Set ontologyBaseUris, String preferredPrefix, String uri) { if (uri.startsWith("urn:")) { return uri.substring(4); } - // if(uri.startsWith("http://purl.obolibrary.org/obo/")) { - // return uri.substring("http://purl.obolibrary.org/obo/".length()); - // } + // Check if it's an ad-hoc member of an OBO space, for example, how + // http://obolibrary.org/obo/mesh#C is an ad-hoc member of the OBO + // mesh space or http://purl.obolibrary.org/obo/chebi#mass as an ad-hoc + // member of the OBO chebi space (not the same as the CHEBI space). + // + // For example, http://purl.obolibrary.org/obo/chebi#mass becomes + // obo:chebi#mass + // + // See further discussion at https://github.com/EBISPOT/ols4/issues/935. + String oboAdHocURIPrefix = oboPurlPrefix + preferredPrefix.toLowerCase() + "#"; + if (uri.startsWith(oboAdHocURIPrefix)) { + return "obo:" + uri.substring(oboPurlPrefix.length()); + } for (String baseUri : ontologyBaseUris) { if (uri.startsWith(baseUri) && preferredPrefix != null) { diff --git a/dataload/rdf2json/src/test/java/TestShortFormAnnotator.java b/dataload/rdf2json/src/test/java/TestShortFormAnnotator.java new file mode 100644 index 000000000..95b553761 --- /dev/null +++ b/dataload/rdf2json/src/test/java/TestShortFormAnnotator.java @@ -0,0 +1,41 @@ +import org.junit.jupiter.api.Test; +import static uk.ac.ebi.rdf2json.annotators.ShortFormAnnotator.extractShortForm; +import java.util.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestShortFormAnnotator { + static Set uriPrefixes = new HashSet<>() {{ + add("http://purl.obolibrary.org/obo/TEST_"); + }}; + + @Test + public void testURN() { + // Test slicing off front of URN + assertEquals("garbage", extractShortForm(uriPrefixes, "TEST", "urn:garbage")); + } + + @Test + public void testOBODefaultNamespace() { + // Test OBO default namespaces (added in https://github.com/EBISPOT/ols4/pull/937) + assertEquals("obo:test#abc", extractShortForm(uriPrefixes, "TEST", "http://purl.obolibrary.org/obo/test#abc")); + + // Reverts to the guess workflow + assertEquals("abc", extractShortForm(uriPrefixes, "XXX", "http://purl.obolibrary.org/obo/test#abc")); + } + + @Test + public void testRegular() { + // Test regular parsing based, note that it takes the prefix given + assertEquals("XXX_1234567", extractShortForm(uriPrefixes, "XXX", "http://purl.obolibrary.org/obo/TEST_1234567")); + assertEquals("TEST_1234567", extractShortForm(uriPrefixes, "TEST", "http://purl.obolibrary.org/obo/TEST_1234567")); + } + + @Test + public void testGuesses() { + // Guesses + assertEquals("1234567", extractShortForm(uriPrefixes, "XXX", "http://example.org/1234567")); + assertEquals("1234567", extractShortForm(uriPrefixes, "TEST", "http://example.org/1234567")); + assertEquals("1234567", extractShortForm(uriPrefixes, "XXX", "http://example.org/any.html#1234567")); + assertEquals("1234567", extractShortForm(uriPrefixes, "TEST", "http://example.org/any.html#1234567")); + } +}