Skip to content

Commit

Permalink
Fix duplicated property problems
Browse files Browse the repository at this point in the history
Added duplicated_property_types function in CLI
  • Loading branch information
JosePizarro3 committed Feb 6, 2025
1 parent 66cd1b4 commit c6f078d
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 3 deletions.
19 changes: 19 additions & 0 deletions bam_masterdata/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from openpyxl import Workbook
from rdflib import Graph

from bam_masterdata.cli.duplicated_property_types import duplicated_property_types
from bam_masterdata.cli.entities_to_excel import entities_to_excel
from bam_masterdata.cli.entities_to_json import entities_to_json
from bam_masterdata.cli.entities_to_rdf import entities_to_rdf
Expand Down Expand Up @@ -166,6 +167,12 @@ def export_to_json(force_delete, python_path):

# Process each module using the `model_to_json` method of each entity
for module_path in py_modules:
if module_path.endswith("property_types.py"):
if duplicated_property_types(module_path=module_path, logger=logger):
click.echo(
"Please fix the duplicated property types before exporting to RDF/XML."
)
return
entities_to_json(module_path=module_path, export_dir=export_dir, logger=logger)

click.echo(f"All entity artifacts have been generated and saved to {export_dir}")
Expand Down Expand Up @@ -216,6 +223,12 @@ def export_to_excel(force_delete, python_path):
masterdata_file = os.path.join(export_dir, "masterdata.xlsx")
wb = Workbook()
for i, module_path in enumerate(py_modules):
if module_path.endswith("property_types.py"):
if duplicated_property_types(module_path=module_path, logger=logger):
click.echo(
"Please fix the duplicated property types before exporting to RDF/XML."
)
return
if i == 0:
ws = wb.active
else:
Expand Down Expand Up @@ -280,6 +293,12 @@ def export_to_rdf(force_delete, python_path):
# Process each module using the `model_to_rdf` method of each entity
graph = Graph()
for module_path in py_modules:
if module_path.endswith("property_types.py"):
if duplicated_property_types(module_path=module_path, logger=logger):
click.echo(
"Please fix the duplicated property types before exporting to RDF/XML."
)
return
entities_to_rdf(graph=graph, module_path=module_path, logger=logger)

# Saving RDF/XML to file
Expand Down
34 changes: 34 additions & 0 deletions bam_masterdata/cli/duplicated_property_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import inspect
import re
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from structlog._config import BoundLoggerLazyProxy

from bam_masterdata.utils import import_module


def duplicated_property_types(module_path: str, logger: "BoundLoggerLazyProxy") -> dict:
duplicated_props: dict = {}
module = import_module(module_path=module_path)
source_code = inspect.getsource(module)
for name, _ in inspect.getmembers(module):
if name.startswith("_") or name == "PropertyTypeDef":
continue

pattern = rf"^\s*{name} *= *PropertyTypeDef"

# Find all matching line numbers
matches = [
i + 1 # Convert to 1-based index
for i, line in enumerate(source_code.splitlines())
if re.match(pattern, line)
]
if len(matches) > 1:
duplicated_props[name] = matches
if duplicated_props:
logger.critical(
f"Found {len(duplicated_props)} duplicated property types. These are stored in a dictionary "
f"where the keys are the names of the variables in property_types.py and the values are the lines in the module: {duplicated_props}"
)
return duplicated_props
6 changes: 4 additions & 2 deletions bam_masterdata/datamodel/property_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3259,7 +3259,8 @@
)


ProductCategory = PropertyTypeDef(
# ! Duplicated variable name for the property type definition (manually fixed)
ProductCategory1 = PropertyTypeDef(
code="PRODUCT_CATEGORY",
description="""Product Category (corresponds to field `Product Category` in the Hazardous Materials Inventory (GSM) of BAM)//Produktkategorie (entspricht Feld `Verwendungstypen/Produktkategorie` aus dem Gefahrstoffmanagement (GSM) der BAM))""",
data_type="CONTROLLEDVOCABULARY",
Expand Down Expand Up @@ -5897,7 +5898,8 @@
)


ProductCategory = PropertyTypeDef(
# ! Duplicated variable name for the property type definition (manually fixed)
ProductCategory2 = PropertyTypeDef(
code="PRODUCT.CATEGORY",
description="""Category""",
data_type="VARCHAR",
Expand Down
5 changes: 4 additions & 1 deletion bam_masterdata/metadata/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,10 @@ def model_id(cls, data: Any) -> Any:
Returns:
Any: The data with the validated fields.
"""
data.id = code_to_class_name(data.code)
if "PropertyType" in data.name:
data.id = code_to_class_name(code=data.code, entity_type="property")
else:
data.id = code_to_class_name(code=data.code, entity_type="object")
return data


Expand Down

1 comment on commit c6f078d

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
bam_masterdata
   logger.py80100% 
bam_masterdata/cli
   cli.py1107070 36%
   duplicated_property_types.py201414 30%
   entities_to_excel.py5433 94%
   entities_to_json.py3655 86%
   entities_to_rdf.py756464 15%
   fill_masterdata.py195181181 7%
bam_masterdata/datamodel
   collection_types.py370100% 
   dataset_types.py184184184 0%
   object_types.py15150100% 
   property_types.py8000100% 
   vocabulary_types.py137210100% 
bam_masterdata/excel
   excel_to_entities.py191167167 13%
bam_masterdata/metadata
   definitions.py870100% 
   entities.py883030 66%
bam_masterdata/openbis
   get_entities.py534343 19%
   login.py633 50%
bam_masterdata/utils
   utils.py6699 86%
TOTAL1724677396% 

Tests Skipped Failures Errors Time
79 1 💤 0 ❌ 0 🔥 19.414s ⏱️

Please sign in to comment.