diff --git a/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java b/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java index 8d12311362e..3deb6acafaf 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -39,6 +39,7 @@ import org.jabref.logic.importer.fetcher.MedlineFetcher; import org.jabref.logic.importer.fetcher.Medra; import org.jabref.logic.importer.fetcher.OpenAccessDoi; +import org.jabref.logic.importer.fetcher.OpenAireFetcher; import org.jabref.logic.importer.fetcher.OpenAlex; import org.jabref.logic.importer.fetcher.ResearchGate; import org.jabref.logic.importer.fetcher.RfcFetcher; @@ -168,6 +169,7 @@ public static synchronized SortedSet getSearchBasedFetchers( searchBasedFetchers.add(new SpringerNatureWebFetcher(importerPreferences)); searchBasedFetchers.add(new CrossRef()); searchBasedFetchers.add(new OpenAlex(importerPreferences)); + searchBasedFetchers.add(new OpenAireFetcher()); searchBasedFetchers.add(new CiteSeer()); searchBasedFetchers.add(new DOAJFetcher(importFormatPreferences)); searchBasedFetchers.add(new IEEE(importFormatPreferences, importerPreferences)); diff --git a/jablib/src/main/java/org/jabref/logic/importer/fetcher/OpenAireFetcher.java b/jablib/src/main/java/org/jabref/logic/importer/fetcher/OpenAireFetcher.java new file mode 100644 index 00000000000..d038f3633e0 --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/importer/fetcher/OpenAireFetcher.java @@ -0,0 +1,184 @@ +package org.jabref.logic.importer.fetcher; + +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import org.jabref.logic.importer.PagedSearchBasedParserFetcher; +import org.jabref.logic.importer.ParseException; +import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.fetcher.transformers.DefaultQueryTransformer; +import org.jabref.logic.importer.util.JsonReader; +import org.jabref.model.entry.AuthorList; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.EntryType; +import org.jabref.model.entry.types.StandardEntryType; +import org.jabref.model.search.query.BaseQueryNode; + +import kong.unirest.core.json.JSONArray; +import kong.unirest.core.json.JSONException; +import kong.unirest.core.json.JSONObject; +import org.apache.hc.core5.net.URIBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/// Fetcher for the OpenAIRE Graph API +/// +/// @see API documentation +public class OpenAireFetcher implements PagedSearchBasedParserFetcher { + + public static final String FETCHER_NAME = "OpenAIRE"; + + private static final Logger LOGGER = LoggerFactory.getLogger(OpenAireFetcher.class); + + private static final String API_URL = "https://api.openaire.eu/graph/v2/researchProducts"; + + @Override + public String getName() { + return FETCHER_NAME; + } + + @Override + public URL getURLForQuery(BaseQueryNode queryNode, int pageNumber) throws URISyntaxException, MalformedURLException { + URIBuilder uriBuilder = new URIBuilder(API_URL); + new DefaultQueryTransformer().transformSearchQuery(queryNode).ifPresent( + query -> uriBuilder.addParameter("search", query)); + uriBuilder.addParameter("page", String.valueOf(pageNumber + 1)); + uriBuilder.addParameter("pageSize", String.valueOf(getPageSize())); + uriBuilder.addParameter("sortBy", "relevance DESC"); + URL result = uriBuilder.build().toURL(); + LOGGER.debug("URL for query: {}", result); + return result; + } + + @Override + public Parser getParser() { + return inputStream -> { + JSONObject response = JsonReader.toJsonObject(inputStream); + if (response.isEmpty() || !response.has("results")) { + return List.of(); + } + JSONArray results = response.getJSONArray("results"); + List entries = new ArrayList<>(results.length()); + for (int i = 0; i < results.length(); i++) { + JSONObject item = results.getJSONObject(i); + entries.add(parseJSONtoBibEntry(item)); + } + return entries; + }; + } + + private BibEntry parseJSONtoBibEntry(JSONObject item) throws ParseException { + try { + BibEntry entry = new BibEntry(parseType(item.optString("type", ""))); + + entry.setField(StandardField.TITLE, item.optString("mainTitle", "")); + + // Description (abstract) - provided as an array + JSONArray descriptions = item.optJSONArray("description"); + if (descriptions != null && !descriptions.isEmpty()) { + entry.setField(StandardField.ABSTRACT, descriptions.optString(0, "")); + } + + // Publication date - extract the year portion + String publicationDate = item.optString("publicationDate", ""); + if (publicationDate.length() >= 4) { + entry.setField(StandardField.YEAR, publicationDate.substring(0, 4)); + } + + // Authors + JSONArray authors = item.optJSONArray("authors"); + if (authors != null && !authors.isEmpty()) { + List authorNames = new ArrayList<>(authors.length()); + for (int i = 0; i < authors.length(); i++) { + JSONObject author = authors.optJSONObject(i); + if (author != null) { + String fullName = author.optString("fullName", ""); + if (!fullName.isBlank()) { + authorNames.add(fullName); + } + } + } + if (!authorNames.isEmpty()) { + entry.setField(StandardField.AUTHOR, + AuthorList.parse(String.join(" and ", authorNames)).getAsLastFirstNamesWithAnd(false)); + } + } + + // PIDs - extract DOI if present + JSONArray pids = item.optJSONArray("pid"); + if (pids != null) { + for (int i = 0; i < pids.length(); i++) { + JSONObject pid = pids.optJSONObject(i); + if (pid != null && "doi".equalsIgnoreCase(pid.optString("scheme", ""))) { + entry.setField(StandardField.DOI, pid.optString("value", "")); + break; + } + } + } + + entry.setField(StandardField.PUBLISHER, item.optString("publisher", "")); + + // Journal info + JSONObject journal = item.optJSONObject("journal"); + if (journal != null) { + entry.setField(StandardField.JOURNAL, journal.optString("name", "")); + entry.setField(StandardField.ISSN, journal.optString("issn", "")); + entry.setField(StandardField.VOLUME, journal.optString("volume", "")); + entry.setField(StandardField.NUMBER, journal.optString("issue", "")); + // OpenAIRE uses "sp"/"ep" for start/end page + String startPage = journal.optString("sp", ""); + String endPage = journal.optString("ep", ""); + if (!startPage.isBlank() && !endPage.isBlank()) { + entry.setField(StandardField.PAGES, startPage + "--" + endPage); + } else if (!startPage.isBlank()) { + entry.setField(StandardField.PAGES, startPage); + } + } + + // Subjects as keywords + JSONArray subjects = item.optJSONArray("subjects"); + if (subjects != null && !subjects.isEmpty()) { + List keywordList = new ArrayList<>(subjects.length()); + for (int i = 0; i < subjects.length(); i++) { + JSONObject subjectWrapper = subjects.optJSONObject(i); + if (subjectWrapper != null) { + // Subject value may be nested under a "subject" key + JSONObject nestedSubject = subjectWrapper.optJSONObject("subject"); + String value = nestedSubject != null + ? nestedSubject.optString("value", "") + : subjectWrapper.optString("value", ""); + if (!value.isBlank()) { + keywordList.add(value); + } + } + } + if (!keywordList.isEmpty()) { + entry.setField(StandardField.KEYWORDS, String.join(", ", keywordList)); + } + } + + // Link to the OpenAIRE explore page using the entry's OpenAIRE id + String id = item.optString("id", ""); + if (!id.isBlank()) { + entry.setField(StandardField.URL, "https://explore.openaire.eu/search/publication?articleId=" + id); + } + + return entry; + } catch (JSONException e) { + throw new ParseException("Could not parse OpenAIRE response", e); + } + } + + private EntryType parseType(String type) { + return switch (type.toLowerCase()) { + case "dataset" -> StandardEntryType.Dataset; + case "software" -> StandardEntryType.Software; + case "other" -> StandardEntryType.Misc; + default -> StandardEntryType.Article; + }; + } +} diff --git a/jablib/src/test/java/org/jabref/logic/importer/fetcher/OpenAireFetcherTest.java b/jablib/src/test/java/org/jabref/logic/importer/fetcher/OpenAireFetcherTest.java new file mode 100644 index 00000000000..04c6b236633 --- /dev/null +++ b/jablib/src/test/java/org/jabref/logic/importer/fetcher/OpenAireFetcherTest.java @@ -0,0 +1,67 @@ +package org.jabref.logic.importer.fetcher; + +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.List; + +import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.search.query.SearchQueryVisitor; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.search.query.SearchQuery; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@FetcherTest +class OpenAireFetcherTest { + + private OpenAireFetcher fetcher; + + @BeforeEach + void setUp() { + fetcher = new OpenAireFetcher(); + } + + @Test + void getName() { + assertEquals("OpenAIRE", fetcher.getName()); + } + + @Test + void getURLForQueryBuildsSearchUrl() throws MalformedURLException, URISyntaxException { + String query = "OpenAIRE Graph"; + SearchQuery searchQueryObject = new SearchQuery(query); + SearchQueryVisitor visitor = new SearchQueryVisitor(searchQueryObject.getSearchFlags()); + URL url = fetcher.getURLForQuery(visitor.visitStart(searchQueryObject.getContext()), 0); + assertTrue(url.toString().startsWith("https://api.openaire.eu/graph/v2/researchProducts")); + assertTrue(url.toString().contains("search=")); + assertTrue(url.toString().contains("pageSize=20")); + assertTrue(url.toString().contains("page=1")); + } + + @Test + void searchByQueryReturnsResults() throws FetcherException { + List result = fetcher.performSearch("OpenAIRE Graph"); + assertFalse(result.isEmpty()); + } + + @Test + void searchByEmptyQueryReturnsEmptyList() throws FetcherException { + List result = fetcher.performSearch(""); + assertEquals(List.of(), result); + } + + @Test + void searchResultHasTitleField() throws FetcherException { + List result = fetcher.performSearch("knowledge graphs"); + assertFalse(result.isEmpty()); + assertTrue(result.stream().anyMatch(entry -> entry.getField(StandardField.TITLE).isPresent())); + } +}