forked from DSpace/DSpace
-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CST-18016] Improved OpenAlex Person import mapping
- Loading branch information
Showing
10 changed files
with
4,089 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
.../org/dspace/importer/external/openalex/metadatamapping/OpenAlexAuthorNameContributor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/** | ||
* The contents of this file are subject to the license and copyright | ||
* detailed in the LICENSE and NOTICE files at the root of the source | ||
* tree and available online at | ||
* | ||
* http://www.dspace.org/license/ | ||
*/ | ||
package org.dspace.importer.external.openalex.metadatamapping; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
|
||
import com.fasterxml.jackson.core.JsonProcessingException; | ||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; | ||
import org.dspace.importer.external.metadatamapping.MetadatumDTO; | ||
import org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor; | ||
|
||
/** | ||
* @author Adamo Fapohunda (adamo.fapohunda at 4science.com) | ||
**/ | ||
public class OpenAlexAuthorNameContributor extends SimpleJsonPathMetadataContributor { | ||
|
||
private final static Logger log = LogManager.getLogger(); | ||
|
||
private String query; | ||
private MetadataFieldConfig field; | ||
|
||
@Override | ||
public void setQuery(String query) { | ||
this.query = query; | ||
} | ||
|
||
@Override | ||
public void setField(MetadataFieldConfig field) { | ||
this.field = field; | ||
} | ||
|
||
@Override | ||
public Collection<MetadatumDTO> contributeMetadata(String fullJson) { | ||
Collection<MetadatumDTO> metadata = new ArrayList<>(); | ||
|
||
if (field == null || field.getElement() == null) { | ||
return metadata; | ||
} | ||
|
||
JsonNode jsonNode = convertStringJsonToJsonNode(fullJson); | ||
JsonNode node = jsonNode.at(query); | ||
|
||
if (node.isArray() || node.isNull() || StringUtils.isBlank(node.asText())) { | ||
return metadata; | ||
} | ||
|
||
String fullName = getStringValue(node).trim(); | ||
String[] nameParts = fullName.split(" "); | ||
|
||
if (nameParts.length < 1) { | ||
return metadata; | ||
} | ||
|
||
String firstName = nameParts.length > 1 ? | ||
String.join(" ", Arrays.copyOfRange(nameParts, 0, nameParts.length - 1)) : ""; | ||
String lastName = nameParts[nameParts.length - 1]; | ||
|
||
// Check field configuration and map accordingly | ||
if ("firstName".equals(field.getElement()) && StringUtils.isNotBlank(firstName)) { | ||
metadata.add(createMetadatum(field, firstName)); | ||
} else if ("familyName".equals(field.getElement()) && StringUtils.isNotBlank(lastName)) { | ||
metadata.add(createMetadatum(field, lastName)); | ||
} | ||
|
||
return metadata; | ||
} | ||
|
||
private MetadatumDTO createMetadatum(MetadataFieldConfig field, String value) { | ||
MetadatumDTO metadatum = new MetadatumDTO(); | ||
metadatum.setValue(value); | ||
metadatum.setElement(field.getElement()); | ||
metadatum.setQualifier(field.getQualifier()); | ||
metadatum.setSchema(field.getSchema()); | ||
return metadatum; | ||
} | ||
|
||
private String getStringValue(JsonNode node) { | ||
if (node.isTextual()) { | ||
return node.textValue(); | ||
} | ||
if (node.isNumber()) { | ||
return node.numberValue().toString(); | ||
} | ||
log.error("It wasn't possible to convert the value of the following JsonNode:" + node.asText()); | ||
return StringUtils.EMPTY; | ||
} | ||
|
||
private JsonNode convertStringJsonToJsonNode(String json) { | ||
ObjectMapper mapper = new ObjectMapper(); | ||
JsonNode body = null; | ||
try { | ||
body = mapper.readTree(json); | ||
} catch (JsonProcessingException e) { | ||
log.error("Unable to process json response.", e); | ||
} | ||
return body; | ||
} | ||
|
||
|
||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
166 changes: 166 additions & 0 deletions
166
dspace-server-webapp/src/test/java/org/dspace/app/rest/OpenAlexPersonExternalSourcesIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
/** | ||
* The contents of this file are subject to the license and copyright | ||
* detailed in the LICENSE and NOTICE files at the root of the source | ||
* tree and available online at | ||
* | ||
* http://www.dspace.org/license/ | ||
*/ | ||
package org.dspace.app.rest; | ||
|
||
import static org.hamcrest.Matchers.hasSize; | ||
import static org.hamcrest.Matchers.is; | ||
import static org.mockito.ArgumentMatchers.anyInt; | ||
import static org.mockito.ArgumentMatchers.anyMap; | ||
import static org.mockito.ArgumentMatchers.anyString; | ||
import static org.mockito.Mockito.times; | ||
import static org.mockito.Mockito.verify; | ||
import static org.mockito.Mockito.when; | ||
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; | ||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; | ||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; | ||
|
||
import java.io.InputStream; | ||
import java.nio.charset.Charset; | ||
|
||
import org.apache.commons.io.IOUtils; | ||
import org.dspace.app.rest.matcher.ExternalSourceEntryMatcher; | ||
import org.dspace.app.rest.matcher.ExternalSourceMatcher; | ||
import org.dspace.app.rest.test.AbstractControllerIntegrationTest; | ||
import org.dspace.importer.external.liveimportclient.service.LiveImportClient; | ||
import org.dspace.importer.external.openalex.service.OpenAlexImportMetadataSourceServiceImpl; | ||
import org.hamcrest.Matchers; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.beans.factory.annotation.Qualifier; | ||
import org.springframework.test.context.bean.override.mockito.MockitoBean; | ||
import org.springframework.test.util.ReflectionTestUtils; | ||
|
||
public class OpenAlexPersonExternalSourcesIT extends AbstractControllerIntegrationTest { | ||
|
||
|
||
@MockitoBean | ||
private LiveImportClient liveImportClient; | ||
|
||
@Autowired | ||
@Qualifier("openalexImportPersonService") | ||
private OpenAlexImportMetadataSourceServiceImpl openAlexImportMetadataSourceService; | ||
|
||
|
||
@Before | ||
public void setUp() { | ||
ReflectionTestUtils.setField(openAlexImportMetadataSourceService, "liveImportClient", liveImportClient); | ||
} | ||
|
||
@Test | ||
public void findOneOpenalexImportPersonServiceExternalSourceTest() throws Exception { | ||
getClient().perform(get("/api/integration/externalsources?size=25")).andExpect(status().isOk()) | ||
.andExpect(jsonPath("$._embedded.externalsources", Matchers.hasItem( | ||
ExternalSourceMatcher.matchExternalSource("openalexPerson", | ||
"openalexPerson", false)))); | ||
} | ||
|
||
@Test | ||
public void findOpenalexPersonExternalSourceEntriesEmptyWithQueryTest() throws Exception { | ||
|
||
try (InputStream file = getClass().getResourceAsStream("openalex-person-empty.json")) { | ||
String jsonResponse = IOUtils.toString(file, Charset.defaultCharset()); | ||
when(liveImportClient.executeHttpGetRequest(anyInt(), anyString(), anyMap())) | ||
.thenReturn(jsonResponse); | ||
|
||
|
||
getClient().perform(get("/api/integration/externalsources/openalexPerson/entries") | ||
.param("query", "empty")) | ||
.andExpect(status().isOk()).andExpect(jsonPath("$.page.number", is(0))); | ||
verify(liveImportClient, times(2)).executeHttpGetRequest(anyInt(), anyString(), anyMap()); | ||
} | ||
} | ||
|
||
@Test | ||
public void findOpenalexPersonExternalSourceEntriesTest() throws Exception { | ||
try (InputStream file = getClass().getResourceAsStream("openalex-person-single.json")) { | ||
String jsonResponse = IOUtils.toString(file, Charset.defaultCharset()); | ||
when(liveImportClient.executeHttpGetRequest(anyInt(), anyString(), anyMap())) | ||
.thenReturn(jsonResponse); | ||
|
||
getClient().perform(get("/api/integration/externalsources/openalexPerson/entries") | ||
.param("query", "Claudio Cortese")) | ||
.andExpect(status().isOk()) | ||
.andExpect(jsonPath("$.page.number", is(0))) | ||
.andExpect(jsonPath("$.page.totalElements", is(1))) | ||
.andExpect(jsonPath("$.page.totalPages", is(1))) | ||
.andExpect(jsonPath("$.page.size", is(20))) | ||
|
||
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].id", is("A5016721535"))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].display", is("Claudio Giovanni Cortese"))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].value", is("Claudio Giovanni Cortese"))) | ||
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].externalSource", is("openalexPerson"))) | ||
|
||
// Verify metadata fields | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].metadata['dc.identifier.openalex'][0].value", | ||
is("A5016721535"))) | ||
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].metadata['person.firstName'][0].value", | ||
is("Claudio Giovanni"))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.familyName'][0].value", | ||
is("Cortese"))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.identifier.orcid'][0].value", | ||
is("https://orcid.org/0000-0002-9429-5000"))) | ||
|
||
// Verify affiliations | ||
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name']", | ||
hasSize(10))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name'][0].value", | ||
is("University of Rome Tor Vergata"))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name'][1].value", | ||
is("Ospedali Riuniti di Ancona"))) | ||
.andExpect( | ||
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name'][2].value", | ||
is("University of Naples Federico II"))); | ||
|
||
verify(liveImportClient, times(2)).executeHttpGetRequest(anyInt(), anyString(), anyMap()); | ||
} | ||
} | ||
|
||
|
||
@Test | ||
public void findAllOpenalexPersonExternalSourceEntriesWithQueryTest() throws Exception { | ||
|
||
try (InputStream file = getClass().getResourceAsStream("openalex-person-multiple.json")) { | ||
String jsonResponse = IOUtils.toString(file, Charset.defaultCharset()); | ||
|
||
when(liveImportClient.executeHttpGetRequest(anyInt(), anyString(), anyMap())) | ||
.thenReturn(jsonResponse); | ||
|
||
getClient().perform(get("/api/integration/externalsources/openalexPerson/entries") | ||
.param("query", "covid")) | ||
.andExpect(status().isOk()) | ||
.andExpect(jsonPath("$._embedded.externalSourceEntries", hasSize(2))) | ||
.andExpect(jsonPath("$._embedded.externalSourceEntries", | ||
Matchers.containsInAnyOrder( | ||
ExternalSourceEntryMatcher.matchExternalSourceEntry( | ||
"A5016721535", | ||
"Claudio Cortese", | ||
"Claudio Cortese", | ||
"openalexPerson" | ||
), | ||
ExternalSourceEntryMatcher.matchExternalSourceEntry( | ||
"A5008845767", | ||
"Claudio Giovanni Cortese", | ||
"Claudio Giovanni Cortese", | ||
"openalexPerson" | ||
) | ||
) | ||
)); | ||
|
||
verify(liveImportClient, times(2)).executeHttpGetRequest(anyInt(), anyString(), anyMap()); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
dspace-server-webapp/src/test/resources/org/dspace/app/rest/openalex-person-empty.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"meta": { | ||
"count": 0, | ||
"db_response_time_ms": 65, | ||
"page": 1, | ||
"per_page": 25, | ||
"groups_count": null | ||
}, | ||
"results": [], | ||
"group_by": [] | ||
} |
Oops, something went wrong.