From b0eb7f4a141d1a70746f74d2cf29bc270dacec3d Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 12:46:15 +0300 Subject: [PATCH 01/15] Fix import validation Co-authored-by: Niilo Kurki --- app/imports/validation_lib/base_validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/imports/validation_lib/base_validation.py b/app/imports/validation_lib/base_validation.py index a40f0f8..443e936 100644 --- a/app/imports/validation_lib/base_validation.py +++ b/app/imports/validation_lib/base_validation.py @@ -271,10 +271,11 @@ def validate_in_fields(self, data, field_name, rule): """Used for validating fields for some number of values to allow, returns a list of error messages""" #retrieve the value for that in rule ls = rule.split(':')[1].split(',') + ls = [x.lower() for x in ls] errs = [] try: - if str(data[field_name]) not in ls: + if str(data[field_name]).lower() not in ls: errs.append(self.return_field_message(field_name, "in")) except KeyError: errs.append(self.return_field_message(field_name,'in')) From 921e956a9ba8d340672e22792f190ac1b086f9ae Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 12:47:04 +0300 Subject: [PATCH 02/15] Fix diet import validation --- app/imports/importers/diet_importer.py | 2 +- .../validation_lib/diet_set_validation.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py index d1dd93e..4cfa51d 100644 --- a/app/imports/importers/diet_importer.py +++ b/app/imports/importers/diet_importer.py @@ -51,7 +51,7 @@ def importRow(self, row): part_of_organism = None else: part_of_organism = ChoiceValue.objects.filter( - choice_set="FoodItemPart", caption=part_of_organism).first() + choice_set="FoodItemPart", caption=part_of_organism.upper()).first() if not part_of_organism: part_of_organism = None diff --git a/app/imports/validation_lib/diet_set_validation.py b/app/imports/validation_lib/diet_set_validation.py index 4561e52..cb57ab3 100644 --- a/app/imports/validation_lib/diet_set_validation.py +++ b/app/imports/validation_lib/diet_set_validation.py @@ -12,18 +12,18 @@ def __init__(self): self.rules = { "author": "required|author", "verbatimScientificName": "required|alpha|max:500", - "taxonRank": "in:Subspecies,Varietas,Forma,Species,Genus,Nothogenus,Nothospecies,Nothosubspecies,Family,nan,species,subspecies,varietas,forma,family,genus,nothogenus,nothospecies,nothosubspecies,SPECIES,SUBSPECIES,VARIETAS,FORMA,FAMILY,GENUS,NOTHOSPECIES,NOTHOSUBSPECIES,NOTHOSPECIES,NOTHOSUBSPECIES", + "taxonRank": "in:Subspecies,Varietas,Forma,Species,Genus,Nothogenus,Nothospecies,Nothosubspecies,Family,nan", "verbatimLocality": "max:250", - "habitat": "max:250", - "samplingEffort": "max:250", + "habitat": "max:250", + "samplingEffort": "max:250", "sex": "choiceValue:gender", - "individualCount": "digits", + "individualCount": "digits", "verbatimEventDate": "max:250", - "verbatimAssociatedTaxa": "max:250", - # "PartOfOrganism" : "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE", - "sequence": "max:250", - "measurementValue": "digis|min:0", - "associatedReferences": "max:250", + "verbatimAssociatedTaxa": "max:250", + "PartOfOrganism" : "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE", + "sequence": "max:250", + "measurementValue": "digis|min:0", + "associatedReferences": "max:250", "references": "required|min:10|max:500|regex:.*([1-2][0-9]{3})", } From 0df8de74e894536cef339010f833cd04d147ffe5 Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 13:57:32 +0300 Subject: [PATCH 03/15] Fix validation message Co-authored-by: Niilo Kurki --- app/imports/validation_lib/base_validation.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/imports/validation_lib/base_validation.py b/app/imports/validation_lib/base_validation.py index 443e936..b76b853 100644 --- a/app/imports/validation_lib/base_validation.py +++ b/app/imports/validation_lib/base_validation.py @@ -222,6 +222,10 @@ def validate_boolean_fields(self, data, field_name): def validate_digit_fields(self, data, field_name): """Used for validating integer fields, returns a list of error messages""" errs = [] + #if , in value return error + if ',' in str(data[field_name]): + errs.append(self.return_field_message("digits", "not_decimal").format(value=data[field_name])) + return errs try: if not isinstance(float(data[field_name]),(int, float)) or data[field_name] == "nan" or data[field_name] == "": @@ -478,7 +482,7 @@ def get_error_message_templates(self): "boolean": "'%s' has invalid value for boolean field", "required": "'%s' must be filled", "alpha": "'%s' can have only alphabets", - "digits": "'%s' must be an integer", + "digits": "'%s' must be an number", "author": "'%s' field must follow the following format: 0000-0000-0000-0000", "max": "The maximum value for the field '%s' is invalid", "min": "The minimum value for the field '%s' is invalid", @@ -518,4 +522,5 @@ def get_custom_error_messages(self): "active.no_field":"You did not provide any field named active in your data dictionary", "age.no_field":"You did not provide any field named age in your data dictionary", "choiceValue.invalid_value":"'{value}' is invalid value for {field} field", + "digits.not_decimal":"'{value}' is not a decimal number. Use . for decimal separator", } \ No newline at end of file From 5ce01792adf13a0db24bad95fe76905c3ebb98dc Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 14:00:36 +0300 Subject: [PATCH 04/15] Fix import message and validation and formatting --- app/imports/importers/diet_importer.py | 116 ++++++++++++++---- .../validation_lib/diet_set_validation.py | 2 +- app/imports/views_wrapper.py | 5 +- 3 files changed, 94 insertions(+), 29 deletions(-) diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py index 4cfa51d..2a4c67a 100644 --- a/app/imports/importers/diet_importer.py +++ b/app/imports/importers/diet_importer.py @@ -13,7 +13,6 @@ from .base_importer import BaseImporter - class DietImporter(BaseImporter): """Class for diet importer """ @@ -27,7 +26,12 @@ def importRow(self, row): entityclass = self.get_or_create_entity_class( getattr(row, 'taxonRank'), author) taxon = self.get_or_create_source_entity( - getattr(row, 'verbatimScientificName'), reference, entityclass, author) + getattr( + row, + 'verbatimScientificName'), + reference, + entityclass, + author) method = self.get_or_create_source_method( getattr(row, 'measurementMethod'), reference, author) time_period = self.get_or_create_time_period( @@ -44,7 +48,7 @@ def importRow(self, row): if gender == "nan" or gender == "": gender = None else: - gender, created = ChoiceValue.objects.get_or_create( + gender = ChoiceValue.objects.get_or_create( choice_set="Gender", caption=gender.capitalize()) if part_of_organism == "nan" or part_of_organism == "": @@ -55,11 +59,40 @@ def importRow(self, row): if not part_of_organism: part_of_organism = None - obj = DietSet.objects.filter(reference=reference, cited_reference=getattr(row, 'associatedReferences'), taxon=taxon, location=new_source_location, sample_size=self.possible_nan_to_zero(getattr(row, 'individualCount')), - time_period=time_period, method=method, study_time=getattr(row, 'verbatimEventDate')).first() + obj = DietSet.objects.filter( + reference=reference, + cited_reference=getattr( + row, + 'associatedReferences'), + taxon=taxon, + location=new_source_location, + sample_size=self.possible_nan_to_zero( + getattr( + row, + 'individualCount')), + time_period=time_period, + method=method, + study_time=getattr( + row, + 'verbatimEventDate')).first() if not obj: - obj = DietSet.objects.create(reference=reference, cited_reference=getattr(row, 'associatedReferences'), taxon=taxon, location=new_source_location, sample_size=self.possible_nan_to_zero( - getattr(row, 'individualCount')), time_period=time_period, method=method, study_time=getattr(row, 'verbatimEventDate'), created_by=author) + obj = DietSet.objects.create( + reference=reference, + cited_reference=getattr( + row, + 'associatedReferences'), + taxon=taxon, + location=new_source_location, + sample_size=self.possible_nan_to_zero( + getattr( + row, + 'individualCount')), + time_period=time_period, + method=method, + study_time=getattr( + row, + 'verbatimEventDate'), + created_by=author) print("Diet set created") verbatim_associated_taxa = str(getattr(row, 'verbatimAssociatedTaxa')) @@ -71,15 +104,19 @@ def importRow(self, row): list_order = getattr(row, 'sequence') percentage = self.possible_nan_to_zero( getattr(row, 'measurementValue')) + diet_set_item = DietSetItem.objects.filter( diet_set=obj, food_item=food_item, percentage=percentage) if diet_set_item.exists(): print("Diet set item link exists") - else: - DietSetItem.objects.create( - diet_set=obj, food_item=food_item, list_order=list_order, percentage=percentage) - print("Diet set item link created") + return False + DietSetItem.objects.create( + diet_set=obj, + food_item=food_item, + list_order=list_order, + percentage=percentage) + print("Diet set item link created") return True def search_food_item(self, query): @@ -87,8 +124,9 @@ def search_food_item(self, query): try: session = CachedSession( - ITIS_CACHE, expire_after=timedelta(days=30), stale_if_error=True) - file = session.get(url+query.lower().capitalize()) + ITIS_CACHE, expire_after=timedelta( + days=30), stale_if_error=True) + file = session.get(url + query.lower().capitalize()) data = file.text except (ConnectionError, UnicodeError): return {'data': [{}]} @@ -101,7 +139,8 @@ def search_food_item(self, query): except json.JSONDecodeError: return {'data': [{}]} return_data = {} - if taxon_data and taxon_data['scientificName'].lower() == query.lower(): + if taxon_data and taxon_data['scientificName'].lower( + ) == query.lower(): tsn = taxon_data['tsn'] scientific_name = taxon_data['scientificName'] return_data = self.create_return_data( @@ -120,9 +159,17 @@ def create_return_data(self, tsn, scientific_name, status='valid'): classification_path = hierarchyToString( scientific_name, hierarchy, 'hierarchyList', 'taxonName') classification_path_ids = hierarchyToString( - tsn, hierarchy, 'hierarchyList', 'tsn', stop_index=classification_path.count("-")) + tsn, + hierarchy, + 'hierarchyList', + 'tsn', + stop_index=classification_path.count("-")) classification_path_ranks = hierarchyToString( - 'Species', hierarchy, 'hierarchyList', 'rankName', stop_index=classification_path.count("-")) + 'Species', + hierarchy, + 'hierarchyList', + 'rankName', + stop_index=classification_path.count("-")) return_data = { 'taxon_id': tsn, 'canonical_form': scientific_name, @@ -158,20 +205,30 @@ def create_tsn(self, results, tsn): rank = TaxonUnitTypes.objects.filter( rank_name=path_rank, kingdom_id=kingdom_id).first().pk - taxonomic_unit = TaxonomicUnits(tsn=tsn, kingdom_id=kingdom_id, rank_id=rank, completename=completename, - hierarchy_string=hierarchy_string, hierarchy=hierarchy, common_names=None, tsn_update_date=None) + taxonomic_unit = TaxonomicUnits( + tsn=tsn, + kingdom_id=kingdom_id, + rank_id=rank, + completename=completename, + hierarchy_string=hierarchy_string, + hierarchy=hierarchy, + common_names=None, + tsn_update_date=None) taxonomic_unit.save() else: taxonomic_unit = taxonomic_unit.first() - if results['data'][0]['results'][0]['taxonomic_status'] in ("invalid", "not accepted"): + if results['data'][0]['results'][0]['taxonomic_status'] in ( + "invalid", "not accepted"): accepted_results = self.get_accepted_tsn(tsn) accepted_taxonomic_unit = self.create_tsn(accepted_results, int( accepted_results['data'][0]['results'][0]['taxon_id'])) sl_qs = SynonymLinks.objects.all().filter(tsn=tsn) if len(sl_qs) == 0: - sl = SynonymLinks(tsn=taxonomic_unit, tsn_accepted=accepted_taxonomic_unit, - tsn_accepted_name=accepted_taxonomic_unit.completename) + sl = SynonymLinks( + tsn=taxonomic_unit, + tsn_accepted=accepted_taxonomic_unit, + tsn_accepted_name=accepted_taxonomic_unit.completename) sl.save() else: sl = sl_qs[0] @@ -188,14 +245,20 @@ def create_fooditem(self, results, food_upper, part): food_item_exists = FoodItem.objects.filter(name__iexact=food_upper) if len(food_item_exists) > 0: return food_item_exists[0] - food_item = FoodItem(name=food_upper, is_cultivar=False, - pa_tsn=taxonomic_unit, part=part, tsn=taxonomic_unit) + food_item = FoodItem( + name=food_upper, + is_cultivar=False, + pa_tsn=taxonomic_unit, + part=part, + tsn=taxonomic_unit) food_item.save() return food_item def generate_rank_id(self, food): associated_taxa = re.sub( - r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W', ' ', food).strip().split() + r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W', + ' ', + food).strip().split() head = 0 tail = 0 rank_id = {} @@ -209,8 +272,9 @@ def generate_rank_id(self, food): tail += 1 results = self.search_food_item(query) if len(results['data'][0]) > 0: - rank = int(getTaxonomicRankNameFromTSN( - results['data'][0]['results'][0]['taxon_id'])['rankId']) + rank = int( + getTaxonomicRankNameFromTSN( + results['data'][0]['results'][0]['taxon_id'])['rankId']) rank_id[rank] = results break if head >= len(associated_taxa): diff --git a/app/imports/validation_lib/diet_set_validation.py b/app/imports/validation_lib/diet_set_validation.py index cb57ab3..4a3de9e 100644 --- a/app/imports/validation_lib/diet_set_validation.py +++ b/app/imports/validation_lib/diet_set_validation.py @@ -22,7 +22,7 @@ def __init__(self): "verbatimAssociatedTaxa": "max:250", "PartOfOrganism" : "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE", "sequence": "max:250", - "measurementValue": "digis|min:0", + "measurementValue": "digits|min:0", "associatedReferences": "max:250", "references": "required|min:10|max:500|regex:.*([1-2][0-9]{3})", } diff --git a/app/imports/views_wrapper.py b/app/imports/views_wrapper.py index 880c68b..6221d61 100644 --- a/app/imports/views_wrapper.py +++ b/app/imports/views_wrapper.py @@ -43,13 +43,14 @@ def wrapper(request, validator, importer, path): rows_imported, rows_skipped = row_importer(df, importer) if rows_imported > 0: message = (f"File imported successfully. {rows_imported} rows of data were imported." - f"({rows_skipped} rows were skipped.)") + f" ({rows_skipped} rows were skipped.)") messages.add_message(request, 50, message, extra_tags="import-message") messages.add_message(request, 50, df.to_html(), extra_tags="show-data") return HttpResponseRedirect(reverse(path)) - message = f"File failed to import. {rows_imported} rows of data were imported." + message = (f"File failed to import. {rows_imported} rows of data were imported." + f" ({rows_skipped} rows were skipped.)") messages.error(request, message) return HttpResponseRedirect(reverse(path)) From db3553a11e8bcc8014f082c7cf837b43cfdb7f57 Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 19:08:44 +0300 Subject: [PATCH 05/15] Fix nan values --- app/imports/importers/occurrence_importer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/imports/importers/occurrence_importer.py b/app/imports/importers/occurrence_importer.py index 6af5bbd..b87e5f4 100644 --- a/app/imports/importers/occurrence_importer.py +++ b/app/imports/importers/occurrence_importer.py @@ -44,7 +44,7 @@ def importRow(self, row): reference, author ) new_event, created = Event.objects.get_or_create( - verbatim_event_date=getattr(row, 'verbatimEventDate'), + verbatim_event_date=self.possible_nan_to_none(getattr(row, 'verbatimEventDate')), source_habitat=habitat ) print(f"Event created: {new_event}") @@ -76,12 +76,12 @@ def importRow(self, row): event=new_event, source_location=new_source_location, source_entity=verbatim_scientific_name, - organism_quantity=getattr(row, 'organismQuantity'), - organism_quantity_type=getattr(row, 'organismQuantityType'), + organism_quantity=self.possible_nan_to_none(getattr(row, 'organismQuantity')), + organism_quantity_type=self.possible_nan_to_none(getattr(row, 'organismQuantityType')), gender=gender, life_stage=life_stage, - occurrence_remarks=getattr(row, 'occurrenceRemarks'), - associated_references=getattr(row, 'associatedReferences') + occurrence_remarks=self.possible_nan_to_none(getattr(row, 'occurrenceRemarks')), + associated_references=self.possible_nan_to_none(getattr(row, 'associatedReferences')) ) if created: print(f"Occurrence created: {obj}") From ad556c92568ff71b72919fa1b702349b818a4946 Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 19:09:48 +0300 Subject: [PATCH 06/15] Update choice value validation to work with food item part --- app/imports/validation_lib/base_validation.py | 4 ++-- app/imports/validation_lib/diet_set_validation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/imports/validation_lib/base_validation.py b/app/imports/validation_lib/base_validation.py index 0a03069..6b91f90 100644 --- a/app/imports/validation_lib/base_validation.py +++ b/app/imports/validation_lib/base_validation.py @@ -335,11 +335,11 @@ def validate_choice_value(self, data, field_name, rule): choice_set = str(rule.split(':')[1]) model = str(rule.split(':')[0]) - choicevalue = ChoiceValue.objects.filter(choice_set=choice_set.capitalize(), caption=field_value.capitalize()) + choicevalue = ChoiceValue.objects.filter(choice_set__iexact=choice_set, caption__iexact=field_value) if field_value == 'nan' or field_value == "": return errs - if len(choicevalue) == 0 or field_value.capitalize() != choicevalue[0].caption: + if len(choicevalue) == 0 or field_value.lower() != choicevalue[0].caption.lower(): errs.append(self.return_field_message(model, 'invalid_value').format(value=field_value, field=field_name)) return errs diff --git a/app/imports/validation_lib/diet_set_validation.py b/app/imports/validation_lib/diet_set_validation.py index 4a3de9e..444ba8c 100644 --- a/app/imports/validation_lib/diet_set_validation.py +++ b/app/imports/validation_lib/diet_set_validation.py @@ -20,7 +20,7 @@ def __init__(self): "individualCount": "digits", "verbatimEventDate": "max:250", "verbatimAssociatedTaxa": "max:250", - "PartOfOrganism" : "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE", + "PartOfOrganism" : "choiceValue:fooditempart", "sequence": "max:250", "measurementValue": "digits|min:0", "associatedReferences": "max:250", From 64b7524ef564ec562a9c1d9887ab954930dc7bea Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 19:10:30 +0300 Subject: [PATCH 07/15] Fix index error --- app/imports/views_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/imports/views_wrapper.py b/app/imports/views_wrapper.py index 92c501e..98305c4 100644 --- a/app/imports/views_wrapper.py +++ b/app/imports/views_wrapper.py @@ -117,7 +117,7 @@ def check_author_consistency(df: pd.DataFrame): Returns: bool: True if all authors match the first author, False otherwise. """ - first_author = df['author'].iloc[1] # Get the author value from the first row + first_author = df['author'].iloc[0] # Get the author value from the first row # Compare all author values with the first author author_match = (df['author'] == first_author).all() return author_match From 3cea87cb6f07ad9ce350ed39f1b93ae7ec56d91b Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 19:11:26 +0300 Subject: [PATCH 08/15] Fix small ets import bug found accidentally --- app/imports/importers/ets_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/imports/importers/ets_importer.py b/app/imports/importers/ets_importer.py index fdaa4ab..6c36d87 100644 --- a/app/imports/importers/ets_importer.py +++ b/app/imports/importers/ets_importer.py @@ -170,7 +170,7 @@ def get_choicevalue_ets(self, choice, choice_set): if choice is None or choice == 'nan': return None choiceset_obj = ChoiceValue.objects.filter( - caption=choice.capitalize(), choice_set=choice_set.capitalize()) + caption__iexact=choice, choice_set__iexact=choice_set) if len(choiceset_obj) > 0: return choiceset_obj[0] else: From 8d42f823aa9b18270d1d2f341279ae00bf5988ce Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 19:12:20 +0300 Subject: [PATCH 09/15] Fix choice value and nan values --- app/imports/importers/base_importer.py | 2 +- app/imports/importers/diet_importer.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py index 41ca083..354d68a 100644 --- a/app/imports/importers/base_importer.py +++ b/app/imports/importers/base_importer.py @@ -268,7 +268,7 @@ def possible_nan_to_zero(self, size): return size def possible_nan_to_none(self, possible): - if possible == 'nan': + if possible != possible or possible == 'nan': return None return possible diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py index 2a4c67a..1e8c2e2 100644 --- a/app/imports/importers/diet_importer.py +++ b/app/imports/importers/diet_importer.py @@ -48,14 +48,16 @@ def importRow(self, row): if gender == "nan" or gender == "": gender = None else: - gender = ChoiceValue.objects.get_or_create( - choice_set="Gender", caption=gender.capitalize()) + gender = ChoiceValue.objects.filter( + choice_set="Gender", caption__iexact=gender).first() + if not gender: + part_of_organism = None if part_of_organism == "nan" or part_of_organism == "": part_of_organism = None else: part_of_organism = ChoiceValue.objects.filter( - choice_set="FoodItemPart", caption=part_of_organism.upper()).first() + choice_set="FoodItemPart", caption__iexact=part_of_organism).first() if not part_of_organism: part_of_organism = None @@ -65,6 +67,7 @@ def importRow(self, row): row, 'associatedReferences'), taxon=taxon, + gender=gender, location=new_source_location, sample_size=self.possible_nan_to_zero( getattr( @@ -82,6 +85,7 @@ def importRow(self, row): row, 'associatedReferences'), taxon=taxon, + gender=gender, location=new_source_location, sample_size=self.possible_nan_to_zero( getattr( From e7348e44f484b2349e69f0b539ea08c8dd0944e7 Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 20:01:34 +0300 Subject: [PATCH 10/15] Fix typo --- app/imports/importers/diet_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py index 1e8c2e2..fed50a8 100644 --- a/app/imports/importers/diet_importer.py +++ b/app/imports/importers/diet_importer.py @@ -51,7 +51,7 @@ def importRow(self, row): gender = ChoiceValue.objects.filter( choice_set="Gender", caption__iexact=gender).first() if not gender: - part_of_organism = None + gender = None if part_of_organism == "nan" or part_of_organism == "": part_of_organism = None From 78cc877c98b84fb7c9a6b5df6f515b4279d4f596 Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Wed, 24 Apr 2024 20:24:32 +0300 Subject: [PATCH 11/15] Fix failing tests --- app/tests/imports/test_occurence_import.py | 2 +- app/tests/imports/test_pa_import.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/tests/imports/test_occurence_import.py b/app/tests/imports/test_occurence_import.py index fb58100..c7c0507 100644 --- a/app/tests/imports/test_occurence_import.py +++ b/app/tests/imports/test_occurence_import.py @@ -25,7 +25,7 @@ def test_import_valid_occurences(self): response = self.client.post('/import/occurrences', {'name': 'fred', 'csv_file': fp}) messages = list(get_messages(response.wsgi_request)) self.assertEqual(len(messages), 2) - self.assertEqual(str(messages[0]), 'File imported successfully. 6 rows of data were imported.(0 rows were skipped.)') + self.assertEqual(str(messages[0]), 'File imported successfully. 6 rows of data were imported. (0 rows were skipped.)') self.assertEqual(response.status_code, 302) def test_import_invalid_occurences(self): diff --git a/app/tests/imports/test_pa_import.py b/app/tests/imports/test_pa_import.py index 63d9697..ef42217 100644 --- a/app/tests/imports/test_pa_import.py +++ b/app/tests/imports/test_pa_import.py @@ -24,7 +24,7 @@ def test_import_pa_post_correct_file(self): messages = list(get_messages(response.wsgi_request)) self.assertEqual(len(messages), 2) self.assertEqual( - str(messages[0]), "File imported successfully. 1 rows of data were imported.(1 rows were skipped.)") + str(messages[0]), "File imported successfully. 1 rows of data were imported. (1 rows were skipped.)") self.assertEqual(response.status_code, 302) def test_import_pa_post_incorrect_file(self): From 881a8f5515c1f3a1b31e1bab548890eabd5a5ded Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Thu, 25 Apr 2024 15:39:26 +0300 Subject: [PATCH 12/15] Fix time period --- app/imports/importers/base_importer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py index 354d68a..b08486f 100644 --- a/app/imports/importers/base_importer.py +++ b/app/imports/importers/base_importer.py @@ -225,9 +225,12 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe """ Return TimePeriod object for the given time_period or create a new one """ + if time_period != time_period or time_period == 'nan' or time_period == "": + return None + time_period = TimePeriod.objects.filter( name__iexact=time_period, reference=source_reference) - if time_period.count() == 1: + if time_period.count() > 0: return time_period[0] new_time_period = TimePeriod( From dc7450de34bf0df80f8069db255faa4095065f1e Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Thu, 25 Apr 2024 18:16:17 +0300 Subject: [PATCH 13/15] Fix another bug in time period --- app/imports/importers/base_importer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py index b08486f..e84fce4 100644 --- a/app/imports/importers/base_importer.py +++ b/app/imports/importers/base_importer.py @@ -228,10 +228,10 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe if time_period != time_period or time_period == 'nan' or time_period == "": return None - time_period = TimePeriod.objects.filter( + time_period_filtered = TimePeriod.objects.filter( name__iexact=time_period, reference=source_reference) - if time_period.count() > 0: - return time_period[0] + if time_period_filtered.count() == 1: + return time_period_filtered[0] new_time_period = TimePeriod( name=time_period, reference=source_reference, created_by=author) From a970681bae0ea6e3fd4c76db6ff2bcbc227fca6a Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Fri, 26 Apr 2024 15:37:01 +0300 Subject: [PATCH 14/15] Fix base importer bugs --- app/imports/importers/base_importer.py | 99 ++++++++++++++------------ 1 file changed, 53 insertions(+), 46 deletions(-) diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py index e84fce4..aa867c4 100644 --- a/app/imports/importers/base_importer.py +++ b/app/imports/importers/base_importer.py @@ -17,9 +17,6 @@ SourceMethod, ChoiceValue, SourceLocation) -import re -from requests_cache import CachedSession -from datetime import timedelta from config.settings import ITIS_CACHE @@ -54,7 +51,8 @@ def get_author(self, social_id: str): return author[0] raise Exception("Author not found") - def get_master_reference_from_cross_ref(self, citation: str, user_author: User): + def get_master_reference_from_cross_ref( + self, citation: str, user_author: User): """ Gets the master reference from crossref API https://api.crossref.org/swagger-ui/index.htm @@ -111,7 +109,7 @@ def get_or_create_master_reference(self, citation: str, author: User): Return MasterReference object for the given source_reference """ master_reference = MasterReference.objects.filter(citation=citation) - if master_reference.count() == 1: + if master_reference.count() > 0: return master_reference[0] new_master_reference = self.get_master_reference_from_cross_ref( citation, author) @@ -131,7 +129,7 @@ def get_or_create_source_reference(self, citation: str, author: User): source_reference = SourceReference.objects.filter( citation__iexact=citation) - if source_reference.count() == 1: + if source_reference.count() > 0: return source_reference[0] new_reference = SourceReference( @@ -148,7 +146,7 @@ def get_or_create_entity_class(self, taxon_rank: str, author: User): Return EntityClass object for the given taxon_rank or create a new one """ entity_class = EntityClass.objects.filter(name__iexact=taxon_rank) - if entity_class.count() == 1: + if entity_class.count() > 0: return entity_class[0] new_entity_class = EntityClass(name=taxon_rank, created_by=author) new_entity_class.save() @@ -161,7 +159,7 @@ def get_or_create_source_entity(self, name: str, source_reference: SourceReferen """ source_entity = SourceEntity.objects.filter( name__iexact=name, reference=source_reference) - if source_entity.count() == 1: + if source_entity.count() > 0: return source_entity[0] new_source_entity = SourceEntity( name=name, reference=source_reference, created_by=author, entity=entity_class) @@ -178,7 +176,7 @@ def create_entity_relation(self, source_entity): data_status_id=5).filter( master_entity__reference_id=4).filter( relation__name__iexact='Taxon Match') - if found_entity_relation.count() == 1: + if found_entity_relation.count() > 0: EntityRelation(master_entity=found_entity_relation[0].master_entity, source_entity=source_entity, relation=found_entity_relation[0].relation, data_status=found_entity_relation[0].data_status, @@ -193,27 +191,32 @@ def create_and_link_entity_relation_from_api(self, source_entity): """ name = self.search_scientificName(source_entity.name) if name: - master_entity_result = MasterEntity.objects.filter(name=name, entity_id=source_entity.entity_id,reference_id=4) + master_entity_result = MasterEntity.objects.filter( + name=name, entity_id=source_entity.entity_id, reference_id=4) if master_entity_result: - return EntityRelation(master_entity=master_entity_result[0], - source_entity=source_entity, - relation_id=1, - data_status_id=5, - relation_status_id=1, - remarks=master_entity_result[0].reference).save() + return EntityRelation(master_entity=master_entity_result[0], + source_entity=source_entity, + relation_id=1, + data_status_id=5, + relation_status_id=1, + remarks=master_entity_result[0].reference).save() else: - return None - - def get_or_create_source_location(self, location: str, source_reference: SourceReference, author: User): + return None + + def get_or_create_source_location( + self, location: str, source_reference: SourceReference, author: User): """ Return SourceLocation object for the given location or create a new one """ + if location != location or location == 'nan' or location == "": + return None + try: source_location = SourceLocation.objects.filter( name__iexact=location, reference=source_reference) except Exception as error: raise Exception(str(error)) from error - if source_location.count() == 1: + if source_location.count() > 0: return source_location[0] new_source_location = SourceLocation( name=location, reference=source_reference, created_by=author) @@ -227,10 +230,10 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe """ if time_period != time_period or time_period == 'nan' or time_period == "": return None - + time_period_filtered = TimePeriod.objects.filter( name__iexact=time_period, reference=source_reference) - if time_period_filtered.count() == 1: + if time_period_filtered.count() > 0: return time_period_filtered[0] new_time_period = TimePeriod( @@ -243,9 +246,12 @@ def get_or_create_source_method(self, method: str, source_reference: SourceRefer """ Return SourceMethod object for the given method or create a new one """ + if method != method or method == 'nan' or method == "": + return None + source_method = SourceMethod.objects.filter( name__iexact=method, reference=source_reference) - if source_method.count() == 1: + if source_method.count() > 0: return source_method[0] new_source_method = SourceMethod( @@ -276,28 +282,29 @@ def possible_nan_to_none(self, possible): return possible def search_scientificName(self, entity_name): - queries = self.clean_query(entity_name) - url = 'http://www.itis.gov/ITISWebService/jsonservice/getITISTermsFromScientificName?srchKey=' - - try: - session = CachedSession(ITIS_CACHE, expire_after=timedelta(days=30), stale_if_error=True) - for query in queries: - file = session.get(url+query) - data = file.json() - if data['itisTerms'][0] != None: - break - - except (ConnectionError, UnicodeError): - return None - - taxon_data = data['itisTerms'][0] - if taxon_data and taxon_data['scientificName'].lower(): - return taxon_data['scientificName'] - else: - return None + query = self.clean_query(entity_name) + url = 'http://www.itis.gov/ITISWebService/jsonservice/getITISTermsFromScientificName?srchKey=' + try: + session = CachedSession( + ITIS_CACHE, expire_after=timedelta( + days=30), stale_if_error=True) + file = session.get(url + query) + data = file.json() - def clean_query(self, food): - cleaned_food = re.sub(r'\s*\b(sp|ssp|af|aff|gen)\.?|\s*[\(\)\-]', '', food.lower()).capitalize().strip() - parts = cleaned_food.split() - return parts + except (ConnectionError, UnicodeError, json.JSONDecodeError): + return None + + itis_terms = data.get('itisTerms', []) + if itis_terms: + taxon_data = itis_terms[0] + if taxon_data and taxon_data['scientificName'].lower( + ) == query.lower(): + return taxon_data['scientificName'] + return None + def clean_query(self, name): + cleaned_name = re.sub( + r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W', + ' ', + name).strip() + return cleaned_name From 611a4e94f6b72353e5e5622d75a0382a1988c694 Mon Sep 17 00:00:00 2001 From: Sanni Tuomisto Date: Fri, 26 Apr 2024 15:37:21 +0300 Subject: [PATCH 15/15] Fix diet import bugs --- app/imports/importers/diet_importer.py | 44 +++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py index fed50a8..65b1268 100644 --- a/app/imports/importers/diet_importer.py +++ b/app/imports/importers/diet_importer.py @@ -19,7 +19,7 @@ class DietImporter(BaseImporter): @transaction.atomic def importRow(self, row): - # Common assignments + # Common assignments for diet set author = self.get_author(getattr(row, 'author')) reference = self.get_or_create_source_reference( getattr(row, 'references'), author) @@ -36,11 +36,22 @@ def importRow(self, row): getattr(row, 'measurementMethod'), reference, author) time_period = self.get_or_create_time_period( (getattr(row, 'samplingEffort')), reference, author) + cited_reference=self.possible_nan_to_none(getattr( + row, + 'associatedReferences')) + sample_size=self.possible_nan_to_zero( + getattr( + row, + 'individualCount')) + study_time=self.possible_nan_to_none(getattr( + row, + 'verbatimEventDate')) - # Create source location model + # Create source location new_source_location = self.get_or_create_source_location( getattr(row, 'verbatimLocality'), reference, author) + # Check choice values gender = str(getattr(row, 'sex')) part_of_organism = str(getattr(row, 'PartOfOrganism')) @@ -61,44 +72,32 @@ def importRow(self, row): if not part_of_organism: part_of_organism = None + # Create diet set obj = DietSet.objects.filter( reference=reference, - cited_reference=getattr( - row, - 'associatedReferences'), + cited_reference=cited_reference, taxon=taxon, gender=gender, location=new_source_location, - sample_size=self.possible_nan_to_zero( - getattr( - row, - 'individualCount')), + sample_size=sample_size, time_period=time_period, method=method, - study_time=getattr( - row, - 'verbatimEventDate')).first() + study_time=study_time).first() if not obj: obj = DietSet.objects.create( reference=reference, - cited_reference=getattr( - row, - 'associatedReferences'), + cited_reference=cited_reference, taxon=taxon, gender=gender, location=new_source_location, - sample_size=self.possible_nan_to_zero( - getattr( - row, - 'individualCount')), + sample_size=sample_size, time_period=time_period, method=method, - study_time=getattr( - row, - 'verbatimEventDate'), + study_time=study_time, created_by=author) print("Diet set created") + # Common assignments for diet set item verbatim_associated_taxa = str(getattr(row, 'verbatimAssociatedTaxa')) if verbatim_associated_taxa == "nan" or verbatim_associated_taxa == "": food_item = None @@ -109,6 +108,7 @@ def importRow(self, row): percentage = self.possible_nan_to_zero( getattr(row, 'measurementValue')) + # Create diet set item diet_set_item = DietSetItem.objects.filter( diet_set=obj, food_item=food_item, percentage=percentage)