Skip to content

Commit

Permalink
Merge pull request #142 from karilint/diet_set_fix
Browse files Browse the repository at this point in the history
I'm merging this PR already to main. Please reopen if there's more fixes to come.
  • Loading branch information
Viljami Ilola authored Apr 29, 2024
2 parents 55af530 + a721275 commit 9ff7c01
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 101 deletions.
106 changes: 58 additions & 48 deletions app/imports/importers/base_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@
SourceMethod,
ChoiceValue,
SourceLocation)
import re
from requests_cache import CachedSession
from datetime import timedelta
from config.settings import ITIS_CACHE


Expand Down Expand Up @@ -54,7 +51,8 @@ def get_author(self, social_id: str):
return author[0]
raise Exception("Author not found")

def get_master_reference_from_cross_ref(self, citation: str, user_author: User):
def get_master_reference_from_cross_ref(
self, citation: str, user_author: User):
"""
Gets the master reference from crossref API
https://api.crossref.org/swagger-ui/index.htm
Expand Down Expand Up @@ -111,7 +109,7 @@ def get_or_create_master_reference(self, citation: str, author: User):
Return MasterReference object for the given source_reference
"""
master_reference = MasterReference.objects.filter(citation=citation)
if master_reference.count() == 1:
if master_reference.count() > 0:
return master_reference[0]
new_master_reference = self.get_master_reference_from_cross_ref(
citation, author)
Expand All @@ -131,7 +129,7 @@ def get_or_create_source_reference(self, citation: str, author: User):
source_reference = SourceReference.objects.filter(
citation__iexact=citation)

if source_reference.count() == 1:
if source_reference.count() > 0:
return source_reference[0]

new_reference = SourceReference(
Expand All @@ -148,7 +146,7 @@ def get_or_create_entity_class(self, taxon_rank: str, author: User):
Return EntityClass object for the given taxon_rank or create a new one
"""
entity_class = EntityClass.objects.filter(name__iexact=taxon_rank)
if entity_class.count() == 1:
if entity_class.count() > 0:
return entity_class[0]
new_entity_class = EntityClass(name=taxon_rank, created_by=author)
new_entity_class.save()
Expand All @@ -161,7 +159,7 @@ def get_or_create_source_entity(self, name: str, source_reference: SourceReferen
"""
source_entity = SourceEntity.objects.filter(
name__iexact=name, reference=source_reference)
if source_entity.count() == 1:
if source_entity.count() > 0:
return source_entity[0]
new_source_entity = SourceEntity(
name=name, reference=source_reference, created_by=author, entity=entity_class)
Expand All @@ -178,7 +176,7 @@ def create_entity_relation(self, source_entity):
data_status_id=5).filter(
master_entity__reference_id=4).filter(
relation__name__iexact='Taxon Match')
if found_entity_relation.count() == 1:
if found_entity_relation.count() > 0:
EntityRelation(master_entity=found_entity_relation[0].master_entity,
source_entity=source_entity, relation=found_entity_relation[0].relation,
data_status=found_entity_relation[0].data_status,
Expand All @@ -193,27 +191,32 @@ def create_and_link_entity_relation_from_api(self, source_entity):
"""
name = self.search_scientificName(source_entity.name)
if name:
master_entity_result = MasterEntity.objects.filter(name=name, entity_id=source_entity.entity_id,reference_id=4)
master_entity_result = MasterEntity.objects.filter(
name=name, entity_id=source_entity.entity_id, reference_id=4)
if master_entity_result:
return EntityRelation(master_entity=master_entity_result[0],
source_entity=source_entity,
relation_id=1,
data_status_id=5,
relation_status_id=1,
remarks=master_entity_result[0].reference).save()
return EntityRelation(master_entity=master_entity_result[0],
source_entity=source_entity,
relation_id=1,
data_status_id=5,
relation_status_id=1,
remarks=master_entity_result[0].reference).save()
else:
return None

def get_or_create_source_location(self, location: str, source_reference: SourceReference, author: User):
return None

def get_or_create_source_location(
self, location: str, source_reference: SourceReference, author: User):
"""
Return SourceLocation object for the given location or create a new one
"""
if location != location or location == 'nan' or location == "":
return None

try:
source_location = SourceLocation.objects.filter(
name__iexact=location, reference=source_reference)
except Exception as error:
raise Exception(str(error)) from error
if source_location.count() == 1:
if source_location.count() > 0:
return source_location[0]
new_source_location = SourceLocation(
name=location, reference=source_reference, created_by=author)
Expand All @@ -225,10 +228,13 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe
"""
Return TimePeriod object for the given time_period or create a new one
"""
time_period = TimePeriod.objects.filter(
if time_period != time_period or time_period == 'nan' or time_period == "":
return None

time_period_filtered = TimePeriod.objects.filter(
name__iexact=time_period, reference=source_reference)
if time_period.count() == 1:
return time_period[0]
if time_period_filtered.count() > 0:
return time_period_filtered[0]

new_time_period = TimePeriod(
name=time_period, reference=source_reference, created_by=author)
Expand All @@ -240,9 +246,12 @@ def get_or_create_source_method(self, method: str, source_reference: SourceRefer
"""
Return SourceMethod object for the given method or create a new one
"""
if method != method or method == 'nan' or method == "":
return None

source_method = SourceMethod.objects.filter(
name__iexact=method, reference=source_reference)
if source_method.count() == 1:
if source_method.count() > 0:
return source_method[0]

new_source_method = SourceMethod(
Expand All @@ -268,33 +277,34 @@ def possible_nan_to_zero(self, size):
return size

def possible_nan_to_none(self, possible):
if possible == 'nan':
if possible != possible or possible == 'nan':
return None
return possible

def search_scientificName(self, entity_name):
queries = self.clean_query(entity_name)
url = 'http://www.itis.gov/ITISWebService/jsonservice/getITISTermsFromScientificName?srchKey='

try:
session = CachedSession(ITIS_CACHE, expire_after=timedelta(days=30), stale_if_error=True)
for query in queries:
file = session.get(url+query)
data = file.json()
if data['itisTerms'][0] != None:
break

except (ConnectionError, UnicodeError):
return None

taxon_data = data['itisTerms'][0]
if taxon_data and taxon_data['scientificName'].lower():
return taxon_data['scientificName']
else:
return None
query = self.clean_query(entity_name)
url = 'http://www.itis.gov/ITISWebService/jsonservice/getITISTermsFromScientificName?srchKey='
try:
session = CachedSession(
ITIS_CACHE, expire_after=timedelta(
days=30), stale_if_error=True)
file = session.get(url + query)
data = file.json()

def clean_query(self, food):
cleaned_food = re.sub(r'\s*\b(sp|ssp|af|aff|gen)\.?|\s*[\(\)\-]', '', food.lower()).capitalize().strip()
parts = cleaned_food.split()
return parts
except (ConnectionError, UnicodeError, json.JSONDecodeError):
return None

itis_terms = data.get('itisTerms', [])
if itis_terms:
taxon_data = itis_terms[0]
if taxon_data and taxon_data['scientificName'].lower(
) == query.lower():
return taxon_data['scientificName']
return None

def clean_query(self, name):
cleaned_name = re.sub(
r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W',
' ',
name).strip()
return cleaned_name
Loading

0 comments on commit 9ff7c01

Please sign in to comment.