Skip to content

Commit

Permalink
[CST-18016] Improved OpenAlex Person import mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamF42 committed Feb 25, 2025
1 parent bc2dd1e commit e92a53a
Show file tree
Hide file tree
Showing 10 changed files with 4,089 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ public class OpenAlexPublicationLoader extends PublicationLoader {
public List<String> searchMetadataValues(Item researcher) {
List<String> names = getNames();

// First, check for "dc.identifier" and build the filter if present
// First, check for "dc.identifier.openalex" and build the filter if present
List<String> authorIds = names.stream()
.filter("dc.identifier"::equals)
.filter("dc.identifier.openalex"::equals)
.map(name -> itemService.getMetadata(researcher, name))
.filter(Objects::nonNull)
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.openalex.metadatamapping;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
import org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor;

/**
* @author Adamo Fapohunda (adamo.fapohunda at 4science.com)
**/
public class OpenAlexAuthorNameContributor extends SimpleJsonPathMetadataContributor {

private final static Logger log = LogManager.getLogger();

private String query;
private MetadataFieldConfig field;

@Override
public void setQuery(String query) {
this.query = query;
}

@Override
public void setField(MetadataFieldConfig field) {
this.field = field;
}

@Override
public Collection<MetadatumDTO> contributeMetadata(String fullJson) {
Collection<MetadatumDTO> metadata = new ArrayList<>();

if (field == null || field.getElement() == null) {
return metadata;
}

JsonNode jsonNode = convertStringJsonToJsonNode(fullJson);
JsonNode node = jsonNode.at(query);

if (node.isArray() || node.isNull() || StringUtils.isBlank(node.asText())) {
return metadata;
}

String fullName = getStringValue(node).trim();
String[] nameParts = fullName.split(" ");

if (nameParts.length < 1) {
return metadata;
}

String firstName = nameParts.length > 1 ?
String.join(" ", Arrays.copyOfRange(nameParts, 0, nameParts.length - 1)) : "";
String lastName = nameParts[nameParts.length - 1];

// Check field configuration and map accordingly
if ("firstName".equals(field.getElement()) && StringUtils.isNotBlank(firstName)) {
metadata.add(createMetadatum(field, firstName));
} else if ("familyName".equals(field.getElement()) && StringUtils.isNotBlank(lastName)) {
metadata.add(createMetadatum(field, lastName));
}

return metadata;
}

private MetadatumDTO createMetadatum(MetadataFieldConfig field, String value) {
MetadatumDTO metadatum = new MetadatumDTO();
metadatum.setValue(value);
metadatum.setElement(field.getElement());
metadatum.setQualifier(field.getQualifier());
metadatum.setSchema(field.getSchema());
return metadatum;
}

private String getStringValue(JsonNode node) {
if (node.isTextual()) {
return node.textValue();
}
if (node.isNumber()) {
return node.numberValue().toString();
}
log.error("It wasn't possible to convert the value of the following JsonNode:" + node.asText());
return StringUtils.EMPTY;
}

private JsonNode convertStringJsonToJsonNode(String json) {
ObjectMapper mapper = new ObjectMapper();
JsonNode body = null;
try {
body = mapper.readTree(json);
} catch (JsonProcessingException e) {
log.error("Unable to process json response.", e);
}
return body;
}


}



Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
<bean id="openalexPersonLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="openalexImportPersonService"/>
<property name="sourceIdentifier" value="openalexPerson"/>
<property name="recordIdMetadata" value="dc.identifier"/>
<property name="recordIdMetadata" value="dc.identifier.openalex"/>
<property name="supportedEntityTypes">
<list>
<value>Person</value>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.rest;

import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyMap;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;

import java.io.InputStream;
import java.nio.charset.Charset;

import org.apache.commons.io.IOUtils;
import org.dspace.app.rest.matcher.ExternalSourceEntryMatcher;
import org.dspace.app.rest.matcher.ExternalSourceMatcher;
import org.dspace.app.rest.test.AbstractControllerIntegrationTest;
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
import org.dspace.importer.external.openalex.service.OpenAlexImportMetadataSourceServiceImpl;
import org.hamcrest.Matchers;
import org.junit.Before;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.test.util.ReflectionTestUtils;

public class OpenAlexPersonExternalSourcesIT extends AbstractControllerIntegrationTest {


@MockitoBean
private LiveImportClient liveImportClient;

@Autowired
@Qualifier("openalexImportPersonService")
private OpenAlexImportMetadataSourceServiceImpl openAlexImportMetadataSourceService;


@Before
public void setUp() {
ReflectionTestUtils.setField(openAlexImportMetadataSourceService, "liveImportClient", liveImportClient);
}

@Test
public void findOneOpenalexImportPersonServiceExternalSourceTest() throws Exception {
getClient().perform(get("/api/integration/externalsources?size=25")).andExpect(status().isOk())
.andExpect(jsonPath("$._embedded.externalsources", Matchers.hasItem(
ExternalSourceMatcher.matchExternalSource("openalexPerson",
"openalexPerson", false))));
}

@Test
public void findOpenalexPersonExternalSourceEntriesEmptyWithQueryTest() throws Exception {

try (InputStream file = getClass().getResourceAsStream("openalex-person-empty.json")) {
String jsonResponse = IOUtils.toString(file, Charset.defaultCharset());
when(liveImportClient.executeHttpGetRequest(anyInt(), anyString(), anyMap()))
.thenReturn(jsonResponse);


getClient().perform(get("/api/integration/externalsources/openalexPerson/entries")
.param("query", "empty"))
.andExpect(status().isOk()).andExpect(jsonPath("$.page.number", is(0)));
verify(liveImportClient, times(2)).executeHttpGetRequest(anyInt(), anyString(), anyMap());
}
}

@Test
public void findOpenalexPersonExternalSourceEntriesTest() throws Exception {
try (InputStream file = getClass().getResourceAsStream("openalex-person-single.json")) {
String jsonResponse = IOUtils.toString(file, Charset.defaultCharset());
when(liveImportClient.executeHttpGetRequest(anyInt(), anyString(), anyMap()))
.thenReturn(jsonResponse);

getClient().perform(get("/api/integration/externalsources/openalexPerson/entries")
.param("query", "Claudio Cortese"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.page.number", is(0)))
.andExpect(jsonPath("$.page.totalElements", is(1)))
.andExpect(jsonPath("$.page.totalPages", is(1)))
.andExpect(jsonPath("$.page.size", is(20)))

.andExpect(jsonPath("$._embedded.externalSourceEntries[0].id", is("A5016721535")))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].display", is("Claudio Giovanni Cortese")))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].value", is("Claudio Giovanni Cortese")))
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].externalSource", is("openalexPerson")))

// Verify metadata fields
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].metadata['dc.identifier.openalex'][0].value",
is("A5016721535")))
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].metadata['person.firstName'][0].value",
is("Claudio Giovanni")))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.familyName'][0].value",
is("Cortese")))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.identifier.orcid'][0].value",
is("https://orcid.org/0000-0002-9429-5000")))

// Verify affiliations
.andExpect(jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name']",
hasSize(10)))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name'][0].value",
is("University of Rome Tor Vergata")))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name'][1].value",
is("Ospedali Riuniti di Ancona")))
.andExpect(
jsonPath("$._embedded.externalSourceEntries[0].metadata['person.affiliation.name'][2].value",
is("University of Naples Federico II")));

verify(liveImportClient, times(2)).executeHttpGetRequest(anyInt(), anyString(), anyMap());
}
}


@Test
public void findAllOpenalexPersonExternalSourceEntriesWithQueryTest() throws Exception {

try (InputStream file = getClass().getResourceAsStream("openalex-person-multiple.json")) {
String jsonResponse = IOUtils.toString(file, Charset.defaultCharset());

when(liveImportClient.executeHttpGetRequest(anyInt(), anyString(), anyMap()))
.thenReturn(jsonResponse);

getClient().perform(get("/api/integration/externalsources/openalexPerson/entries")
.param("query", "covid"))
.andExpect(status().isOk())
.andExpect(jsonPath("$._embedded.externalSourceEntries", hasSize(2)))
.andExpect(jsonPath("$._embedded.externalSourceEntries",
Matchers.containsInAnyOrder(
ExternalSourceEntryMatcher.matchExternalSourceEntry(
"A5016721535",
"Claudio Cortese",
"Claudio Cortese",
"openalexPerson"
),
ExternalSourceEntryMatcher.matchExternalSourceEntry(
"A5008845767",
"Claudio Giovanni Cortese",
"Claudio Giovanni Cortese",
"openalexPerson"
)
)
));

verify(liveImportClient, times(2)).executeHttpGetRequest(anyInt(), anyString(), anyMap());
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,6 @@ public void findOneOpenalexImportPublicationByAuthorIdExternalSourceTest() throw
"openalexPublicationByAuthorId", false))));
}

@Test
public void findOneOpenalexImportPersonServiceExternalSourceTest() throws Exception {
getClient().perform(get("/api/integration/externalsources?size=25")).andExpect(status().isOk())
.andExpect(jsonPath("$._embedded.externalsources", Matchers.hasItem(
ExternalSourceMatcher.matchExternalSource("openalexPerson",
"openalexPerson", false))));
}


@Test
public void findOpenalexPublicationExternalSourceEntriesEmptyWithQueryTest() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"meta": {
"count": 0,
"db_response_time_ms": 65,
"page": 1,
"per_page": 25,
"groups_count": null
},
"results": [],
"group_by": []
}
Loading

0 comments on commit e92a53a

Please sign in to comment.