From b0eb7f4a141d1a70746f74d2cf29bc270dacec3d Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 12:46:15 +0300
Subject: [PATCH 01/15] Fix import validation

Co-authored-by: Niilo Kurki <niilo.kurki@helsinki.fi>
---
 app/imports/validation_lib/base_validation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/imports/validation_lib/base_validation.py b/app/imports/validation_lib/base_validation.py
index a40f0f8..443e936 100644
--- a/app/imports/validation_lib/base_validation.py
+++ b/app/imports/validation_lib/base_validation.py
@@ -271,10 +271,11 @@ def validate_in_fields(self, data, field_name, rule):
         """Used for validating fields for some number of values to allow, returns a list of error messages"""
         #retrieve the value for that in rule
         ls = rule.split(':')[1].split(',')
+        ls = [x.lower() for x in ls]
         errs = []
 
         try:
-            if str(data[field_name]) not in ls:
+            if str(data[field_name]).lower() not in ls:
                 errs.append(self.return_field_message(field_name, "in"))
         except KeyError:
             errs.append(self.return_field_message(field_name,'in'))

From 921e956a9ba8d340672e22792f190ac1b086f9ae Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 12:47:04 +0300
Subject: [PATCH 02/15] Fix diet import validation

---
 app/imports/importers/diet_importer.py         |  2 +-
 .../validation_lib/diet_set_validation.py      | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py
index d1dd93e..4cfa51d 100644
--- a/app/imports/importers/diet_importer.py
+++ b/app/imports/importers/diet_importer.py
@@ -51,7 +51,7 @@ def importRow(self, row):
             part_of_organism = None
         else:
             part_of_organism = ChoiceValue.objects.filter(
-                choice_set="FoodItemPart", caption=part_of_organism).first()
+                choice_set="FoodItemPart", caption=part_of_organism.upper()).first()
             if not part_of_organism:
                 part_of_organism = None
 
diff --git a/app/imports/validation_lib/diet_set_validation.py b/app/imports/validation_lib/diet_set_validation.py
index 4561e52..cb57ab3 100644
--- a/app/imports/validation_lib/diet_set_validation.py
+++ b/app/imports/validation_lib/diet_set_validation.py
@@ -12,18 +12,18 @@ def __init__(self):
         self.rules = {
             "author":                       "required|author",
             "verbatimScientificName":       "required|alpha|max:500",
-            "taxonRank":                    "in:Subspecies,Varietas,Forma,Species,Genus,Nothogenus,Nothospecies,Nothosubspecies,Family,nan,species,subspecies,varietas,forma,family,genus,nothogenus,nothospecies,nothosubspecies,SPECIES,SUBSPECIES,VARIETAS,FORMA,FAMILY,GENUS,NOTHOSPECIES,NOTHOSUBSPECIES,NOTHOSPECIES,NOTHOSUBSPECIES",
+            "taxonRank":                    "in:Subspecies,Varietas,Forma,Species,Genus,Nothogenus,Nothospecies,Nothosubspecies,Family,nan",
             "verbatimLocality":             "max:250",
-            "habitat":                     "max:250",
-            "samplingEffort":              "max:250",
+            "habitat":                      "max:250",
+            "samplingEffort":               "max:250",
             "sex":                          "choiceValue:gender",
-            "individualCount":             "digits",
+            "individualCount":              "digits",
             "verbatimEventDate":            "max:250",
-            "verbatimAssociatedTaxa":      "max:250",
-            # "PartOfOrganism" :              "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE",
-            "sequence":                    "max:250",
-            "measurementValue":            "digis|min:0",
-            "associatedReferences":        "max:250",
+            "verbatimAssociatedTaxa":       "max:250",
+            "PartOfOrganism" :              "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE",
+            "sequence":                     "max:250",
+            "measurementValue":             "digis|min:0",
+            "associatedReferences":         "max:250",
             "references":                   "required|min:10|max:500|regex:.*([1-2][0-9]{3})",
         }
 

From 0df8de74e894536cef339010f833cd04d147ffe5 Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 13:57:32 +0300
Subject: [PATCH 03/15] Fix validation message

Co-authored-by: Niilo Kurki <niilo.kurki@helsinki.fi>
---
 app/imports/validation_lib/base_validation.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/imports/validation_lib/base_validation.py b/app/imports/validation_lib/base_validation.py
index 443e936..b76b853 100644
--- a/app/imports/validation_lib/base_validation.py
+++ b/app/imports/validation_lib/base_validation.py
@@ -222,6 +222,10 @@ def validate_boolean_fields(self, data, field_name):
     def validate_digit_fields(self, data, field_name):
         """Used for validating integer fields, returns a list of error messages"""
         errs = []
+        #if , in value return error
+        if ',' in str(data[field_name]):
+            errs.append(self.return_field_message("digits", "not_decimal").format(value=data[field_name]))
+            return errs
 
         try:
             if not isinstance(float(data[field_name]),(int, float)) or data[field_name] == "nan" or data[field_name] == "":
@@ -478,7 +482,7 @@ def get_error_message_templates(self):
             "boolean": "'%s' has invalid value for boolean field",
             "required": "'%s' must be filled",
             "alpha": "'%s' can have only alphabets",
-            "digits": "'%s' must be an integer",
+            "digits": "'%s' must be an number",
             "author": "'%s' field must follow the following format: 0000-0000-0000-0000",
             "max": "The maximum value for the field '%s' is invalid",
             "min": "The minimum value for the field '%s' is invalid",
@@ -518,4 +522,5 @@ def get_custom_error_messages(self):
             "active.no_field":"You did not provide any field named active in your data dictionary",
             "age.no_field":"You did not provide any field named age in your data dictionary",
             "choiceValue.invalid_value":"'{value}' is invalid value for {field} field",
+            "digits.not_decimal":"'{value}' is not a decimal number. Use . for decimal separator",
         }
\ No newline at end of file

From 5ce01792adf13a0db24bad95fe76905c3ebb98dc Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 14:00:36 +0300
Subject: [PATCH 04/15] Fix import message and validation and formatting

---
 app/imports/importers/diet_importer.py        | 116 ++++++++++++++----
 .../validation_lib/diet_set_validation.py     |   2 +-
 app/imports/views_wrapper.py                  |   5 +-
 3 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py
index 4cfa51d..2a4c67a 100644
--- a/app/imports/importers/diet_importer.py
+++ b/app/imports/importers/diet_importer.py
@@ -13,7 +13,6 @@
 from .base_importer import BaseImporter
 
 
-
 class DietImporter(BaseImporter):
     """Class for diet importer
     """
@@ -27,7 +26,12 @@ def importRow(self, row):
         entityclass = self.get_or_create_entity_class(
             getattr(row, 'taxonRank'), author)
         taxon = self.get_or_create_source_entity(
-            getattr(row, 'verbatimScientificName'), reference, entityclass, author)
+            getattr(
+                row,
+                'verbatimScientificName'),
+            reference,
+            entityclass,
+            author)
         method = self.get_or_create_source_method(
             getattr(row, 'measurementMethod'), reference, author)
         time_period = self.get_or_create_time_period(
@@ -44,7 +48,7 @@ def importRow(self, row):
         if gender == "nan" or gender == "":
             gender = None
         else:
-            gender, created = ChoiceValue.objects.get_or_create(
+            gender = ChoiceValue.objects.get_or_create(
                 choice_set="Gender", caption=gender.capitalize())
 
         if part_of_organism == "nan" or part_of_organism == "":
@@ -55,11 +59,40 @@ def importRow(self, row):
             if not part_of_organism:
                 part_of_organism = None
 
-        obj = DietSet.objects.filter(reference=reference, cited_reference=getattr(row, 'associatedReferences'), taxon=taxon, location=new_source_location, sample_size=self.possible_nan_to_zero(getattr(row, 'individualCount')),
-                                     time_period=time_period, method=method, study_time=getattr(row, 'verbatimEventDate')).first()
+        obj = DietSet.objects.filter(
+            reference=reference,
+            cited_reference=getattr(
+                row,
+                'associatedReferences'),
+            taxon=taxon,
+            location=new_source_location,
+            sample_size=self.possible_nan_to_zero(
+                getattr(
+                    row,
+                    'individualCount')),
+            time_period=time_period,
+            method=method,
+            study_time=getattr(
+                row,
+                'verbatimEventDate')).first()
         if not obj:
-            obj = DietSet.objects.create(reference=reference, cited_reference=getattr(row, 'associatedReferences'), taxon=taxon, location=new_source_location, sample_size=self.possible_nan_to_zero(
-                getattr(row, 'individualCount')),                                       time_period=time_period, method=method, study_time=getattr(row, 'verbatimEventDate'), created_by=author)
+            obj = DietSet.objects.create(
+                reference=reference,
+                cited_reference=getattr(
+                    row,
+                    'associatedReferences'),
+                taxon=taxon,
+                location=new_source_location,
+                sample_size=self.possible_nan_to_zero(
+                    getattr(
+                        row,
+                        'individualCount')),
+                time_period=time_period,
+                method=method,
+                study_time=getattr(
+                    row,
+                    'verbatimEventDate'),
+                created_by=author)
             print("Diet set created")
 
         verbatim_associated_taxa = str(getattr(row, 'verbatimAssociatedTaxa'))
@@ -71,15 +104,19 @@ def importRow(self, row):
         list_order = getattr(row, 'sequence')
         percentage = self.possible_nan_to_zero(
             getattr(row, 'measurementValue'))
+
         diet_set_item = DietSetItem.objects.filter(
             diet_set=obj, food_item=food_item, percentage=percentage)
 
         if diet_set_item.exists():
             print("Diet set item link exists")
-        else:
-            DietSetItem.objects.create(
-                diet_set=obj, food_item=food_item, list_order=list_order, percentage=percentage)
-            print("Diet set item link created")
+            return False
+        DietSetItem.objects.create(
+            diet_set=obj,
+            food_item=food_item,
+            list_order=list_order,
+            percentage=percentage)
+        print("Diet set item link created")
         return True
 
     def search_food_item(self, query):
@@ -87,8 +124,9 @@ def search_food_item(self, query):
 
         try:
             session = CachedSession(
-                ITIS_CACHE, expire_after=timedelta(days=30), stale_if_error=True)
-            file = session.get(url+query.lower().capitalize())
+                ITIS_CACHE, expire_after=timedelta(
+                    days=30), stale_if_error=True)
+            file = session.get(url + query.lower().capitalize())
             data = file.text
         except (ConnectionError, UnicodeError):
             return {'data': [{}]}
@@ -101,7 +139,8 @@ def search_food_item(self, query):
         except json.JSONDecodeError:
             return {'data': [{}]}
         return_data = {}
-        if taxon_data and taxon_data['scientificName'].lower() == query.lower():
+        if taxon_data and taxon_data['scientificName'].lower(
+        ) == query.lower():
             tsn = taxon_data['tsn']
             scientific_name = taxon_data['scientificName']
             return_data = self.create_return_data(
@@ -120,9 +159,17 @@ def create_return_data(self, tsn, scientific_name, status='valid'):
             classification_path = hierarchyToString(
                 scientific_name, hierarchy, 'hierarchyList', 'taxonName')
             classification_path_ids = hierarchyToString(
-                tsn, hierarchy, 'hierarchyList', 'tsn', stop_index=classification_path.count("-"))
+                tsn,
+                hierarchy,
+                'hierarchyList',
+                'tsn',
+                stop_index=classification_path.count("-"))
             classification_path_ranks = hierarchyToString(
-                'Species', hierarchy, 'hierarchyList', 'rankName', stop_index=classification_path.count("-"))
+                'Species',
+                hierarchy,
+                'hierarchyList',
+                'rankName',
+                stop_index=classification_path.count("-"))
         return_data = {
             'taxon_id': tsn,
             'canonical_form': scientific_name,
@@ -158,20 +205,30 @@ def create_tsn(self, results, tsn):
                 rank = TaxonUnitTypes.objects.filter(
                     rank_name=path_rank, kingdom_id=kingdom_id).first().pk
 
-            taxonomic_unit = TaxonomicUnits(tsn=tsn, kingdom_id=kingdom_id, rank_id=rank, completename=completename,
-                                            hierarchy_string=hierarchy_string, hierarchy=hierarchy, common_names=None, tsn_update_date=None)
+            taxonomic_unit = TaxonomicUnits(
+                tsn=tsn,
+                kingdom_id=kingdom_id,
+                rank_id=rank,
+                completename=completename,
+                hierarchy_string=hierarchy_string,
+                hierarchy=hierarchy,
+                common_names=None,
+                tsn_update_date=None)
             taxonomic_unit.save()
         else:
             taxonomic_unit = taxonomic_unit.first()
 
-        if results['data'][0]['results'][0]['taxonomic_status'] in ("invalid", "not accepted"):
+        if results['data'][0]['results'][0]['taxonomic_status'] in (
+                "invalid", "not accepted"):
             accepted_results = self.get_accepted_tsn(tsn)
             accepted_taxonomic_unit = self.create_tsn(accepted_results, int(
                 accepted_results['data'][0]['results'][0]['taxon_id']))
             sl_qs = SynonymLinks.objects.all().filter(tsn=tsn)
             if len(sl_qs) == 0:
-                sl = SynonymLinks(tsn=taxonomic_unit, tsn_accepted=accepted_taxonomic_unit,
-                                  tsn_accepted_name=accepted_taxonomic_unit.completename)
+                sl = SynonymLinks(
+                    tsn=taxonomic_unit,
+                    tsn_accepted=accepted_taxonomic_unit,
+                    tsn_accepted_name=accepted_taxonomic_unit.completename)
                 sl.save()
             else:
                 sl = sl_qs[0]
@@ -188,14 +245,20 @@ def create_fooditem(self, results, food_upper, part):
         food_item_exists = FoodItem.objects.filter(name__iexact=food_upper)
         if len(food_item_exists) > 0:
             return food_item_exists[0]
-        food_item = FoodItem(name=food_upper, is_cultivar=False,
-                             pa_tsn=taxonomic_unit, part=part, tsn=taxonomic_unit)
+        food_item = FoodItem(
+            name=food_upper,
+            is_cultivar=False,
+            pa_tsn=taxonomic_unit,
+            part=part,
+            tsn=taxonomic_unit)
         food_item.save()
         return food_item
 
     def generate_rank_id(self, food):
         associated_taxa = re.sub(
-            r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W', ' ', food).strip().split()
+            r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W',
+            ' ',
+            food).strip().split()
         head = 0
         tail = 0
         rank_id = {}
@@ -209,8 +272,9 @@ def generate_rank_id(self, food):
                     tail += 1
                 results = self.search_food_item(query)
                 if len(results['data'][0]) > 0:
-                    rank = int(getTaxonomicRankNameFromTSN(
-                        results['data'][0]['results'][0]['taxon_id'])['rankId'])
+                    rank = int(
+                        getTaxonomicRankNameFromTSN(
+                            results['data'][0]['results'][0]['taxon_id'])['rankId'])
                     rank_id[rank] = results
                     break
                 if head >= len(associated_taxa):
diff --git a/app/imports/validation_lib/diet_set_validation.py b/app/imports/validation_lib/diet_set_validation.py
index cb57ab3..4a3de9e 100644
--- a/app/imports/validation_lib/diet_set_validation.py
+++ b/app/imports/validation_lib/diet_set_validation.py
@@ -22,7 +22,7 @@ def __init__(self):
             "verbatimAssociatedTaxa":       "max:250",
             "PartOfOrganism" :              "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE",
             "sequence":                     "max:250",
-            "measurementValue":             "digis|min:0",
+            "measurementValue":             "digits|min:0",
             "associatedReferences":         "max:250",
             "references":                   "required|min:10|max:500|regex:.*([1-2][0-9]{3})",
         }
diff --git a/app/imports/views_wrapper.py b/app/imports/views_wrapper.py
index 880c68b..6221d61 100644
--- a/app/imports/views_wrapper.py
+++ b/app/imports/views_wrapper.py
@@ -43,13 +43,14 @@ def wrapper(request, validator, importer, path):
         rows_imported, rows_skipped = row_importer(df, importer)
         if rows_imported > 0:
             message = (f"File imported successfully. {rows_imported} rows of data were imported."
-                       f"({rows_skipped} rows were skipped.)")
+                       f" ({rows_skipped} rows were skipped.)")
             messages.add_message(request, 50, message, extra_tags="import-message")
             messages.add_message(request, 50, df.to_html(), extra_tags="show-data")
             return HttpResponseRedirect(reverse(path))
 
 
-        message = f"File failed to import. {rows_imported} rows of data were imported."
+        message = (f"File failed to import. {rows_imported} rows of data were imported."
+                   f" ({rows_skipped} rows were skipped.)")
         messages.error(request, message)
         return HttpResponseRedirect(reverse(path))
 

From db3553a11e8bcc8014f082c7cf837b43cfdb7f57 Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 19:08:44 +0300
Subject: [PATCH 05/15] Fix nan values

---
 app/imports/importers/occurrence_importer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/imports/importers/occurrence_importer.py b/app/imports/importers/occurrence_importer.py
index 6af5bbd..b87e5f4 100644
--- a/app/imports/importers/occurrence_importer.py
+++ b/app/imports/importers/occurrence_importer.py
@@ -44,7 +44,7 @@ def importRow(self, row):
             reference, author
         )
         new_event, created = Event.objects.get_or_create(
-            verbatim_event_date=getattr(row, 'verbatimEventDate'),
+            verbatim_event_date=self.possible_nan_to_none(getattr(row, 'verbatimEventDate')),
             source_habitat=habitat
         )
         print(f"Event created: {new_event}")
@@ -76,12 +76,12 @@ def importRow(self, row):
             event=new_event,
             source_location=new_source_location,
             source_entity=verbatim_scientific_name,
-            organism_quantity=getattr(row, 'organismQuantity'),
-            organism_quantity_type=getattr(row, 'organismQuantityType'),
+            organism_quantity=self.possible_nan_to_none(getattr(row, 'organismQuantity')),
+            organism_quantity_type=self.possible_nan_to_none(getattr(row, 'organismQuantityType')),
             gender=gender,
             life_stage=life_stage,
-            occurrence_remarks=getattr(row, 'occurrenceRemarks'),
-            associated_references=getattr(row, 'associatedReferences')
+            occurrence_remarks=self.possible_nan_to_none(getattr(row, 'occurrenceRemarks')),
+            associated_references=self.possible_nan_to_none(getattr(row, 'associatedReferences'))
         )
         if created:
             print(f"Occurrence created: {obj}")

From ad556c92568ff71b72919fa1b702349b818a4946 Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 19:09:48 +0300
Subject: [PATCH 06/15] Update choice value validation to work with food item
 part

---
 app/imports/validation_lib/base_validation.py     | 4 ++--
 app/imports/validation_lib/diet_set_validation.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/imports/validation_lib/base_validation.py b/app/imports/validation_lib/base_validation.py
index 0a03069..6b91f90 100644
--- a/app/imports/validation_lib/base_validation.py
+++ b/app/imports/validation_lib/base_validation.py
@@ -335,11 +335,11 @@ def validate_choice_value(self, data, field_name, rule):
         choice_set = str(rule.split(':')[1])
         model = str(rule.split(':')[0])
 
-        choicevalue = ChoiceValue.objects.filter(choice_set=choice_set.capitalize(), caption=field_value.capitalize())
+        choicevalue = ChoiceValue.objects.filter(choice_set__iexact=choice_set, caption__iexact=field_value)
 
         if field_value == 'nan' or field_value == "":
             return errs
-        if len(choicevalue) == 0 or field_value.capitalize() != choicevalue[0].caption:
+        if len(choicevalue) == 0 or field_value.lower() != choicevalue[0].caption.lower():
             errs.append(self.return_field_message(model, 'invalid_value').format(value=field_value, field=field_name))
         return errs
 
diff --git a/app/imports/validation_lib/diet_set_validation.py b/app/imports/validation_lib/diet_set_validation.py
index 4a3de9e..444ba8c 100644
--- a/app/imports/validation_lib/diet_set_validation.py
+++ b/app/imports/validation_lib/diet_set_validation.py
@@ -20,7 +20,7 @@ def __init__(self):
             "individualCount":              "digits",
             "verbatimEventDate":            "max:250",
             "verbatimAssociatedTaxa":       "max:250",
-            "PartOfOrganism" :              "in:BARK,BLOOD,BONES,BUD CARRION,EGGS,EXUDATES,FECES,FLOWER,FRUIT,LARVAE,LEAF,MINERAL,NECTAR/JUICE,NONE,POLLEN,ROOT,SEED,SHOOT,STEM,UNKNOWN,WHOLE",
+            "PartOfOrganism" :              "choiceValue:fooditempart",
             "sequence":                     "max:250",
             "measurementValue":             "digits|min:0",
             "associatedReferences":         "max:250",

From 64b7524ef564ec562a9c1d9887ab954930dc7bea Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 19:10:30 +0300
Subject: [PATCH 07/15] Fix index error

---
 app/imports/views_wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/imports/views_wrapper.py b/app/imports/views_wrapper.py
index 92c501e..98305c4 100644
--- a/app/imports/views_wrapper.py
+++ b/app/imports/views_wrapper.py
@@ -117,7 +117,7 @@ def check_author_consistency(df: pd.DataFrame):
     Returns:
         bool: True if all authors match the first author, False otherwise.
     """
-    first_author = df['author'].iloc[1]  # Get the author value from the first row
+    first_author = df['author'].iloc[0]  # Get the author value from the first row
     # Compare all author values with the first author
     author_match = (df['author'] == first_author).all()
     return author_match

From 3cea87cb6f07ad9ce350ed39f1b93ae7ec56d91b Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 19:11:26 +0300
Subject: [PATCH 08/15] Fix small ets import bug found accidentally

---
 app/imports/importers/ets_importer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/imports/importers/ets_importer.py b/app/imports/importers/ets_importer.py
index fdaa4ab..6c36d87 100644
--- a/app/imports/importers/ets_importer.py
+++ b/app/imports/importers/ets_importer.py
@@ -170,7 +170,7 @@ def get_choicevalue_ets(self, choice, choice_set):
         if choice is None or choice == 'nan':
             return None
         choiceset_obj = ChoiceValue.objects.filter(
-            caption=choice.capitalize(), choice_set=choice_set.capitalize())
+            caption__iexact=choice, choice_set__iexact=choice_set)
         if len(choiceset_obj) > 0:
             return choiceset_obj[0]
         else:

From 8d42f823aa9b18270d1d2f341279ae00bf5988ce Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 19:12:20 +0300
Subject: [PATCH 09/15] Fix choice value and nan values

---
 app/imports/importers/base_importer.py |  2 +-
 app/imports/importers/diet_importer.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py
index 41ca083..354d68a 100644
--- a/app/imports/importers/base_importer.py
+++ b/app/imports/importers/base_importer.py
@@ -268,7 +268,7 @@ def possible_nan_to_zero(self, size):
         return size
 
     def possible_nan_to_none(self, possible):
-        if possible == 'nan':
+        if possible != possible or possible == 'nan':
             return None
         return possible
 
diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py
index 2a4c67a..1e8c2e2 100644
--- a/app/imports/importers/diet_importer.py
+++ b/app/imports/importers/diet_importer.py
@@ -48,14 +48,16 @@ def importRow(self, row):
         if gender == "nan" or gender == "":
             gender = None
         else:
-            gender = ChoiceValue.objects.get_or_create(
-                choice_set="Gender", caption=gender.capitalize())
+            gender = ChoiceValue.objects.filter(
+                choice_set="Gender", caption__iexact=gender).first()
+            if not gender:
+                part_of_organism = None
 
         if part_of_organism == "nan" or part_of_organism == "":
             part_of_organism = None
         else:
             part_of_organism = ChoiceValue.objects.filter(
-                choice_set="FoodItemPart", caption=part_of_organism.upper()).first()
+                choice_set="FoodItemPart", caption__iexact=part_of_organism).first()
             if not part_of_organism:
                 part_of_organism = None
 
@@ -65,6 +67,7 @@ def importRow(self, row):
                 row,
                 'associatedReferences'),
             taxon=taxon,
+            gender=gender,
             location=new_source_location,
             sample_size=self.possible_nan_to_zero(
                 getattr(
@@ -82,6 +85,7 @@ def importRow(self, row):
                     row,
                     'associatedReferences'),
                 taxon=taxon,
+                gender=gender,
                 location=new_source_location,
                 sample_size=self.possible_nan_to_zero(
                     getattr(

From e7348e44f484b2349e69f0b539ea08c8dd0944e7 Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 20:01:34 +0300
Subject: [PATCH 10/15] Fix typo

---
 app/imports/importers/diet_importer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py
index 1e8c2e2..fed50a8 100644
--- a/app/imports/importers/diet_importer.py
+++ b/app/imports/importers/diet_importer.py
@@ -51,7 +51,7 @@ def importRow(self, row):
             gender = ChoiceValue.objects.filter(
                 choice_set="Gender", caption__iexact=gender).first()
             if not gender:
-                part_of_organism = None
+                gender = None
 
         if part_of_organism == "nan" or part_of_organism == "":
             part_of_organism = None

From 78cc877c98b84fb7c9a6b5df6f515b4279d4f596 Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Wed, 24 Apr 2024 20:24:32 +0300
Subject: [PATCH 11/15] Fix failing tests

---
 app/tests/imports/test_occurence_import.py | 2 +-
 app/tests/imports/test_pa_import.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/tests/imports/test_occurence_import.py b/app/tests/imports/test_occurence_import.py
index fb58100..c7c0507 100644
--- a/app/tests/imports/test_occurence_import.py
+++ b/app/tests/imports/test_occurence_import.py
@@ -25,7 +25,7 @@ def test_import_valid_occurences(self):
             response = self.client.post('/import/occurrences', {'name': 'fred', 'csv_file': fp})
         messages = list(get_messages(response.wsgi_request))
         self.assertEqual(len(messages), 2)
-        self.assertEqual(str(messages[0]), 'File imported successfully. 6 rows of data were imported.(0 rows were skipped.)')
+        self.assertEqual(str(messages[0]), 'File imported successfully. 6 rows of data were imported. (0 rows were skipped.)')
         self.assertEqual(response.status_code, 302)
 
     def test_import_invalid_occurences(self):
diff --git a/app/tests/imports/test_pa_import.py b/app/tests/imports/test_pa_import.py
index 63d9697..ef42217 100644
--- a/app/tests/imports/test_pa_import.py
+++ b/app/tests/imports/test_pa_import.py
@@ -24,7 +24,7 @@ def test_import_pa_post_correct_file(self):
         messages = list(get_messages(response.wsgi_request))
         self.assertEqual(len(messages), 2)
         self.assertEqual(
-            str(messages[0]), "File imported successfully. 1 rows of data were imported.(1 rows were skipped.)")
+            str(messages[0]), "File imported successfully. 1 rows of data were imported. (1 rows were skipped.)")
         self.assertEqual(response.status_code, 302)
 
     def test_import_pa_post_incorrect_file(self):

From 881a8f5515c1f3a1b31e1bab548890eabd5a5ded Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Thu, 25 Apr 2024 15:39:26 +0300
Subject: [PATCH 12/15] Fix time period

---
 app/imports/importers/base_importer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py
index 354d68a..b08486f 100644
--- a/app/imports/importers/base_importer.py
+++ b/app/imports/importers/base_importer.py
@@ -225,9 +225,12 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe
         """
         Return TimePeriod object for the given time_period or create a new one
         """
+        if time_period != time_period or time_period == 'nan' or time_period == "":
+            return None
+        
         time_period = TimePeriod.objects.filter(
             name__iexact=time_period, reference=source_reference)
-        if time_period.count() == 1:
+        if time_period.count() > 0:
             return time_period[0]
 
         new_time_period = TimePeriod(

From dc7450de34bf0df80f8069db255faa4095065f1e Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Thu, 25 Apr 2024 18:16:17 +0300
Subject: [PATCH 13/15] Fix another bug in time period

---
 app/imports/importers/base_importer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py
index b08486f..e84fce4 100644
--- a/app/imports/importers/base_importer.py
+++ b/app/imports/importers/base_importer.py
@@ -228,10 +228,10 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe
         if time_period != time_period or time_period == 'nan' or time_period == "":
             return None
         
-        time_period = TimePeriod.objects.filter(
+        time_period_filtered = TimePeriod.objects.filter(
             name__iexact=time_period, reference=source_reference)
-        if time_period.count() > 0:
-            return time_period[0]
+        if time_period_filtered.count() == 1:
+            return time_period_filtered[0]
 
         new_time_period = TimePeriod(
             name=time_period, reference=source_reference, created_by=author)

From a970681bae0ea6e3fd4c76db6ff2bcbc227fca6a Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Fri, 26 Apr 2024 15:37:01 +0300
Subject: [PATCH 14/15] Fix base importer bugs

---
 app/imports/importers/base_importer.py | 99 ++++++++++++++------------
 1 file changed, 53 insertions(+), 46 deletions(-)

diff --git a/app/imports/importers/base_importer.py b/app/imports/importers/base_importer.py
index e84fce4..aa867c4 100644
--- a/app/imports/importers/base_importer.py
+++ b/app/imports/importers/base_importer.py
@@ -17,9 +17,6 @@
     SourceMethod,
     ChoiceValue,
     SourceLocation)
-import re
-from requests_cache import CachedSession
-from datetime import timedelta
 from config.settings import ITIS_CACHE
 
 
@@ -54,7 +51,8 @@ def get_author(self, social_id: str):
             return author[0]
         raise Exception("Author not found")
 
-    def get_master_reference_from_cross_ref(self, citation: str, user_author: User):
+    def get_master_reference_from_cross_ref(
+            self, citation: str, user_author: User):
         """
         Gets the master reference from crossref API
         https://api.crossref.org/swagger-ui/index.htm
@@ -111,7 +109,7 @@ def get_or_create_master_reference(self, citation: str, author: User):
         Return MasterReference object for the given source_reference
         """
         master_reference = MasterReference.objects.filter(citation=citation)
-        if master_reference.count() == 1:
+        if master_reference.count() > 0:
             return master_reference[0]
         new_master_reference = self.get_master_reference_from_cross_ref(
             citation, author)
@@ -131,7 +129,7 @@ def get_or_create_source_reference(self, citation: str, author: User):
         source_reference = SourceReference.objects.filter(
             citation__iexact=citation)
 
-        if source_reference.count() == 1:
+        if source_reference.count() > 0:
             return source_reference[0]
 
         new_reference = SourceReference(
@@ -148,7 +146,7 @@ def get_or_create_entity_class(self, taxon_rank: str, author: User):
         Return EntityClass object for the given taxon_rank or create a new one
         """
         entity_class = EntityClass.objects.filter(name__iexact=taxon_rank)
-        if entity_class.count() == 1:
+        if entity_class.count() > 0:
             return entity_class[0]
         new_entity_class = EntityClass(name=taxon_rank, created_by=author)
         new_entity_class.save()
@@ -161,7 +159,7 @@ def get_or_create_source_entity(self, name: str, source_reference: SourceReferen
         """
         source_entity = SourceEntity.objects.filter(
             name__iexact=name, reference=source_reference)
-        if source_entity.count() == 1:
+        if source_entity.count() > 0:
             return source_entity[0]
         new_source_entity = SourceEntity(
             name=name, reference=source_reference, created_by=author, entity=entity_class)
@@ -178,7 +176,7 @@ def create_entity_relation(self, source_entity):
             data_status_id=5).filter(
             master_entity__reference_id=4).filter(
             relation__name__iexact='Taxon Match')
-        if found_entity_relation.count() == 1:
+        if found_entity_relation.count() > 0:
             EntityRelation(master_entity=found_entity_relation[0].master_entity,
                            source_entity=source_entity, relation=found_entity_relation[0].relation,
                            data_status=found_entity_relation[0].data_status,
@@ -193,27 +191,32 @@ def create_and_link_entity_relation_from_api(self, source_entity):
         """
         name = self.search_scientificName(source_entity.name)
         if name:
-            master_entity_result = MasterEntity.objects.filter(name=name, entity_id=source_entity.entity_id,reference_id=4)
+            master_entity_result = MasterEntity.objects.filter(
+                name=name, entity_id=source_entity.entity_id, reference_id=4)
             if master_entity_result:
-               return EntityRelation(master_entity=master_entity_result[0],
-                                source_entity=source_entity,
-                                relation_id=1,
-                                data_status_id=5,
-                                relation_status_id=1,
-                                remarks=master_entity_result[0].reference).save()
+                return EntityRelation(master_entity=master_entity_result[0],
+                                      source_entity=source_entity,
+                                      relation_id=1,
+                                      data_status_id=5,
+                                      relation_status_id=1,
+                                      remarks=master_entity_result[0].reference).save()
         else:
-            return None    
-    
-    def get_or_create_source_location(self, location: str, source_reference: SourceReference, author: User):
+            return None
+
+    def get_or_create_source_location(
+            self, location: str, source_reference: SourceReference, author: User):
         """
         Return SourceLocation object for the given location or create a new one
         """
+        if location != location or location == 'nan' or location == "":
+            return None
+
         try:
             source_location = SourceLocation.objects.filter(
                 name__iexact=location, reference=source_reference)
         except Exception as error:
             raise Exception(str(error)) from error
-        if source_location.count() == 1:
+        if source_location.count() > 0:
             return source_location[0]
         new_source_location = SourceLocation(
             name=location, reference=source_reference, created_by=author)
@@ -227,10 +230,10 @@ def get_or_create_time_period(self, time_period: str, source_reference: SourceRe
         """
         if time_period != time_period or time_period == 'nan' or time_period == "":
             return None
-        
+
         time_period_filtered = TimePeriod.objects.filter(
             name__iexact=time_period, reference=source_reference)
-        if time_period_filtered.count() == 1:
+        if time_period_filtered.count() > 0:
             return time_period_filtered[0]
 
         new_time_period = TimePeriod(
@@ -243,9 +246,12 @@ def get_or_create_source_method(self, method: str, source_reference: SourceRefer
         """
         Return SourceMethod object for the given method or create a new one
         """
+        if method != method or method == 'nan' or method == "":
+            return None
+
         source_method = SourceMethod.objects.filter(
             name__iexact=method, reference=source_reference)
-        if source_method.count() == 1:
+        if source_method.count() > 0:
             return source_method[0]
 
         new_source_method = SourceMethod(
@@ -276,28 +282,29 @@ def possible_nan_to_none(self, possible):
         return possible
 
     def search_scientificName(self, entity_name):
-            queries = self.clean_query(entity_name)
-            url = 'http://www.itis.gov/ITISWebService/jsonservice/getITISTermsFromScientificName?srchKey='
-            
-            try:
-                session = CachedSession(ITIS_CACHE, expire_after=timedelta(days=30), stale_if_error=True)
-                for query in queries:
-                    file = session.get(url+query)
-                    data = file.json()
-                    if data['itisTerms'][0] != None:
-                        break
-                    
-            except (ConnectionError, UnicodeError):
-                return None
-            
-            taxon_data = data['itisTerms'][0]
-            if taxon_data and taxon_data['scientificName'].lower():
-                return taxon_data['scientificName']
-            else:
-                return None
+        query = self.clean_query(entity_name)
+        url = 'http://www.itis.gov/ITISWebService/jsonservice/getITISTermsFromScientificName?srchKey='
+        try:
+            session = CachedSession(
+                ITIS_CACHE, expire_after=timedelta(
+                    days=30), stale_if_error=True)
+            file = session.get(url + query)
+            data = file.json()
 
-    def clean_query(self, food):
-        cleaned_food = re.sub(r'\s*\b(sp|ssp|af|aff|gen)\.?|\s*[\(\)\-]', '', food.lower()).capitalize().strip()
-        parts = cleaned_food.split()
-        return parts
+        except (ConnectionError, UnicodeError, json.JSONDecodeError):
+            return None
+
+        itis_terms = data.get('itisTerms', [])
+        if itis_terms:
+            taxon_data = itis_terms[0]
+            if taxon_data and taxon_data['scientificName'].lower(
+            ) == query.lower():
+                return taxon_data['scientificName']
+        return None
 
+    def clean_query(self, name):
+        cleaned_name = re.sub(
+            r'\b(?:aff|gen|bot|zoo|ssp|subf|exx|indet|subsp|subvar|var|nothovar|group|forma)\.?|\b\w{1,2}\b|\s*\W',
+            ' ',
+            name).strip()
+        return cleaned_name

From 611a4e94f6b72353e5e5622d75a0382a1988c694 Mon Sep 17 00:00:00 2001
From: Sanni Tuomisto <sanni.tuomisto@helsinki.fi>
Date: Fri, 26 Apr 2024 15:37:21 +0300
Subject: [PATCH 15/15] Fix diet import bugs

---
 app/imports/importers/diet_importer.py | 44 +++++++++++++-------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/app/imports/importers/diet_importer.py b/app/imports/importers/diet_importer.py
index fed50a8..65b1268 100644
--- a/app/imports/importers/diet_importer.py
+++ b/app/imports/importers/diet_importer.py
@@ -19,7 +19,7 @@ class DietImporter(BaseImporter):
     @transaction.atomic
     def importRow(self, row):
 
-        # Common assignments
+        # Common assignments for diet set
         author = self.get_author(getattr(row, 'author'))
         reference = self.get_or_create_source_reference(
             getattr(row, 'references'), author)
@@ -36,11 +36,22 @@ def importRow(self, row):
             getattr(row, 'measurementMethod'), reference, author)
         time_period = self.get_or_create_time_period(
             (getattr(row, 'samplingEffort')), reference, author)
+        cited_reference=self.possible_nan_to_none(getattr(
+                row,
+                'associatedReferences'))
+        sample_size=self.possible_nan_to_zero(
+                getattr(
+                    row,
+                    'individualCount'))
+        study_time=self.possible_nan_to_none(getattr(
+                row,
+                'verbatimEventDate'))
 
-        # Create source location model
+        # Create source location
         new_source_location = self.get_or_create_source_location(
             getattr(row, 'verbatimLocality'), reference, author)
 
+        # Check choice values
         gender = str(getattr(row, 'sex'))
 
         part_of_organism = str(getattr(row, 'PartOfOrganism'))
@@ -61,44 +72,32 @@ def importRow(self, row):
             if not part_of_organism:
                 part_of_organism = None
 
+        # Create diet set
         obj = DietSet.objects.filter(
             reference=reference,
-            cited_reference=getattr(
-                row,
-                'associatedReferences'),
+            cited_reference=cited_reference,
             taxon=taxon,
             gender=gender,
             location=new_source_location,
-            sample_size=self.possible_nan_to_zero(
-                getattr(
-                    row,
-                    'individualCount')),
+            sample_size=sample_size,
             time_period=time_period,
             method=method,
-            study_time=getattr(
-                row,
-                'verbatimEventDate')).first()
+            study_time=study_time).first()
         if not obj:
             obj = DietSet.objects.create(
                 reference=reference,
-                cited_reference=getattr(
-                    row,
-                    'associatedReferences'),
+                cited_reference=cited_reference,
                 taxon=taxon,
                 gender=gender,
                 location=new_source_location,
-                sample_size=self.possible_nan_to_zero(
-                    getattr(
-                        row,
-                        'individualCount')),
+                sample_size=sample_size,
                 time_period=time_period,
                 method=method,
-                study_time=getattr(
-                    row,
-                    'verbatimEventDate'),
+                study_time=study_time,
                 created_by=author)
             print("Diet set created")
 
+        # Common assignments for diet set item
         verbatim_associated_taxa = str(getattr(row, 'verbatimAssociatedTaxa'))
         if verbatim_associated_taxa == "nan" or verbatim_associated_taxa == "":
             food_item = None
@@ -109,6 +108,7 @@ def importRow(self, row):
         percentage = self.possible_nan_to_zero(
             getattr(row, 'measurementValue'))
 
+        # Create diet set item
         diet_set_item = DietSetItem.objects.filter(
             diet_set=obj, food_item=food_item, percentage=percentage)