Merge pull request #1 from ODM2/develop

PreRelease 0.0.1
ODM2 · Apr 21, 2022 · 22493b2 · 22493b2
2 parents 4417cdb + 73d7bfd
commit 22493b2
Show file tree

Hide file tree

Showing 21 changed files with 768 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+#build files
+dist/*
+*.egg-info
+
+*.pyc
diff --git a/README.md b/README.md
@@ -0,0 +1,21 @@
+# ODM2DataModels
+
+## What is this?
+odm2datamodels is a Python package that provides a set of object-relational mapping (ORM) data models for the [Observations Data Model Version 2.1](http://www.odm2.org/). This data models are built of the [SQLAlchemy](https://www.sqlalchemy.org/) and provide a convenient way of interfacing with an ODM2.1 database. 
+
+## Core Features
+The primary is the `ODM2DataModels` class, which once instantiated, provides access to the set of ORM ODM2.1 data models and an instance of an ODM2Engine which provide utility function to perform basic Create, Read, Update, Delete operations as well are read execution of custom SQLQueries constructed using a SQLAlchemy [Select object](https://docs.sqlalchemy.org/en/14/orm/queryguide.html#select-statements) or [Query Object](https://docs.sqlalchemy.org/en/14/orm/query.html#sqlalchemy.orm.Query)     
+
+## How to install?
+Presently the build files are only available on our [github repository](https://github.com/ODM2/ODM2DataModels) 
+
+Though we are aiming to release to the [Python Package Index (PyPI)](https://pypi.org/) and [Conda](https://docs.conda.io/en/latest/) in the near future. 
+
+## Testing and Database Dialect Support
+### Testing Method
+Presently very limited testing has been conducted and has primarily been through an implementation of a REST API with limited coverage of selected data models. Further expanding and automating testing is an area for future updates.
+### Database Dialect Support
+These data models have only been validated for a PostgreSQL database running a deployment of the ODM2.1 schema. 
+
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=42"]
+build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,27 @@
+
+[metadata]
+name = odm2datamodels
+description = "Collection of object-relational mapping (ORM) data models for ODM2"
+long_description = file: README.md
+long_description_content = text/markdown
+version = 0.0.1
+author = "ODM2 Team"
+author_email = ""
+url = https://github.com/ODM2/ODM2DataModels
+project_urls =
+    bugtracker = https://github.com/ODM2/ODM2DataModels/issueshttps://github.com/pypa/sampleproject/issues
+keywords='Observations Data Model ODM2'
+
+[options]
+packages = find:
+package_dir =
+    = src
+
+python_requires = >=3.8
+install_requires = 
+    sqlalchemy>=1.4.32
+    pandas>=1.4
+    geoalchemy2>=0.6.3
+
+[options.packages.find]
+where = src
diff --git a/src/odm2datamodels/__init__.py b/src/odm2datamodels/__init__.py
@@ -0,0 +1 @@
+from .base import ODM2DataModels as ODM2DataModels
diff --git a/src/odm2datamodels/base.py b/src/odm2datamodels/base.py
@@ -0,0 +1,234 @@
+import sqlalchemy
+from sqlalchemy.sql.expression import Select
+from sqlalchemy.orm import Query    
+from sqlalchemy.ext.automap import automap_base
+from sqlalchemy.ext.declarative import declared_attr, declarative_base
+import geoalchemy2
+
+import pickle
+from enum import Enum
+from typing import Dict, Union, Any, Type
+import warnings
+
+import pandas as pd
+
+from .exceptions import ObjectNotFound
+
+from .models import annotations
+from .models import auth
+from .models import core
+from .models import cv
+from .models import dataquality
+from .models import equipment
+from .models import extensionproperties
+from .models import externalidentifiers
+from .models import labanalyses
+from .models import provenance
+from .models import results
+from .models import samplingfeatures
+from .models import simulation
+
+
+class OutputFormats(Enum):
+    JSON ='JSON'
+    DATAFRAME = 'DATAFRAME'
+    DICT = 'DICT'
+
+class Base():
+
+    @declared_attr
+    def __tablename__(self) -> str:
+        cls_name = str(self.__name__)
+        return cls_name.lower()
+
+    @classmethod
+    def from_dict(cls, attributes_dict:Dict) -> object:
+        """Alternative constructor that uses dictionary to populate attributes"""
+        instance = cls.__new__(cls)
+        instance.__init__()
+        for key, value in attributes_dict.items():
+            if hasattr(instance, key):
+                if value == '': value = None
+                setattr(instance, key, value)
+        return instance
+
+    def to_dict(self) -> Dict[str,Any]:
+        """Converts attributes into a dictionary"""
+        columns = self.__table__.columns.keys()
+        output_dict = {}
+        for column in columns:
+            output_dict[column] = getattr(self,column)
+        return output_dict
+
+    def update_from_dict(self, attributes_dict:Dict[str, any]) -> None:
+        """Updates instance attributes based on provided dictionary"""
+        for key, value in attributes_dict.items():
+            if hasattr(self, key):
+                if value == '': value = None
+                setattr(self, key, value)
+
+    @classmethod
+    def get_pkey_name(cls) -> Union[str,None]:
+        """ Returns the primary key field name for a given model"""
+        columns = cls.__table__.columns
+        for column in columns:
+            if column.primary_key: return column.name 
+        return None
+
+class ODM2Engine:
+
+    def __init__(self, session_maker:sqlalchemy.orm.sessionmaker) -> None:
+        self.session_maker = session_maker
+
+    def read_query(self, 
+            query: Union[Query, Select],
+            output_format:OutputFormats=OutputFormats.JSON,
+            orient:str='records') -> Union[str, pd.DataFrame]:
+        with self.session_maker() as session:
+            if isinstance(query, Select):
+                df = pd.read_sql(query, session.bind)
+            else:
+                df = pd.read_sql(query.statement, session.bind)
+
+            if output_format == OutputFormats.JSON:
+                return df.to_json(orient=orient)
+            elif output_format == OutputFormats.DATAFRAME:
+                return df
+            elif output_format == OutputFormats.DICT:
+                return df.to_dict()
+            raise TypeError("Unknown output format")
+
+    def insert_query(self) -> None:
+        """Placeholder for bulk insert"""
+        #accept dataframe & model
+        #use pandas to_sql method to perform insert
+        #if except return false or maybe raise error 
+        #else return true
+        raise NotImplementedError
+
+    def create_object(self, obj:object) -> Union[int, str]:
+        pkey_name = obj.get_pkey_name()
+        setattr(obj, pkey_name, None)
+
+        with self.session_maker() as session:
+            session.add(obj)
+            session.commit()
+            pkey_value = getattr(obj, pkey_name)
+            return pkey_value
+
+    def read_object(self, model:Type[Base], pkey:Union[int, str], 
+            output_format: OutputFormats=OutputFormats.DICT, 
+            orient:str='records') -> Dict[str, Any]:
+
+        with self.session_maker() as session:
+            obj = session.get(model, pkey)
+            pkey_name = model.get_pkey_name()
+            if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
+            session.commit()
+
+            obj_dict = obj.to_dict()
+            if output_format == OutputFormats.DICT:
+                return obj_dict
+
+            else:
+                # convert to series if only one row
+                keys = list(obj_dict.keys())
+                if not isinstance(obj_dict[keys[0]], list):
+                    for key in keys:
+                        new_value = [obj_dict[key]]
+                        obj_dict[key] = new_value
+
+                obj_df = pd.DataFrame.from_dict(obj_dict)
+                if output_format == OutputFormats.DATAFRAME:
+                    return obj_df
+                elif output_format == OutputFormats.JSON:
+                    return obj_df.to_json(orient=orient)
+                raise TypeError("Unknown output format")
+
+
+    def update_object(self, model:Type[Base], pkey:Union[int,str], data:Dict[str, Any]) -> None:
+        if not isinstance(data, dict):
+            data = data.dict()
+        pkey_name = model.get_pkey_name()
+        if pkey_name in data:
+            data.pop(pkey_name)
+        with self.session_maker() as session:
+            obj = session.get(model, pkey)
+            if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
+            obj.update_from_dict(data)
+            session.commit()
+
+    def delete_object(self, model:Type[Base], pkey:Union[int, str]) -> None:
+        with self.session_maker() as session:
+            obj = session.get(model, pkey)
+            pkey_name = model.get_pkey_name()
+            if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
+            session.delete(obj)
+            session.commit()
+
+class Models:
+
+    def __init__(self, base_model) -> None:
+        self._base_model = base_model
+        self._process_schema(annotations)        
+        self._process_schema(auth)        
+        self._process_schema(core)        
+        self._process_schema(cv)        
+        self._process_schema(dataquality)        
+        self._process_schema(equipment)        
+        self._process_schema(extensionproperties)        
+        self._process_schema(externalidentifiers)        
+        self._process_schema(labanalyses)        
+        self._process_schema(provenance)        
+        self._process_schema(results)        
+        self._process_schema(samplingfeatures)        
+        self._process_schema(simulation)         
+
+    def _process_schema(self, schema:str) -> None:
+        classes = [c for c in dir(schema) if not c.startswith('__')]
+        base = tuple([self._base_model])
+        for class_name in classes:
+            model = getattr(schema, class_name)
+            model_attribs = self._trim_dunders(dict(model.__dict__.copy())) 
+            extended_model =  type(class_name, base, model_attribs) 
+            setattr(self, class_name, extended_model)
+
+    def _trim_dunders(self, dictionary:Dict[str, Any]) -> Dict[str, Any]:
+        return { k:v for k, v in dictionary.items() if not k.startswith('__') } 
+
+class ODM2DataModels():
+
+    def __init__(self, engine:sqlalchemy.engine, schema:str='odm2', cache_path:str=None) -> None:
+
+        self._schema = schema
+        self._cache_path = cache_path
+
+        self._engine = engine
+        self._session = sqlalchemy.orm.sessionmaker(self._engine)
+        self._cached= False
+        self.odm2_engine: ODM2Engine = ODM2Engine(self._session)
+
+        self._model_base = self._prepare_model_base()
+        self.models = Models(self._model_base)
+        if not self._cached:
+            self._prepare_automap_models()
+
+    def _prepare_model_base(self):
+        try:
+            with open(self._cache_path, 'rb') as file:
+                metadata = pickle.load(file=file)
+                self._cached = True
+                return declarative_base(cls=Base, bind=self._engine, metadata=metadata)
+        except FileNotFoundError: 
+            metadata = sqlalchemy.MetaData(schema=self._schema)
+            self._cached = False
+            return automap_base(cls=Base, metadata=metadata)
+
+    def _prepare_automap_models(self):
+        self._model_base.prepare(self._engine)
+        if not self._cache_path: return
+        try:
+            with open(self._cache_path, 'wb') as file:
+                pickle.dump(self._model_base.metadata, file)
+        except FileNotFoundError:
+            warnings.warn('Unable to cache models which may lead to degraded performance.', RuntimeWarning)
diff --git a/src/odm2datamodels/exceptions.py b/src/odm2datamodels/exceptions.py
@@ -0,0 +1,5 @@
+class ObjectNotFound(Exception):
+
+    def __init__(self, message:str) -> None:
+        self.message = message
+        super().__init__()
diff --git a/src/odm2datamodels/models/__init__.py b/src/odm2datamodels/models/__init__.py
@@ -0,0 +1,13 @@
+from . import annotations
+from . import auth
+from . import core
+from . import cv
+from . import dataquality
+from . import equipment
+from . import extensionproperties
+from . import externalidentifiers
+from . import labanalyses
+from . import provenance
+from . import results
+from . import samplingfeatures
+from . import simulation
diff --git a/src/odm2datamodels/models/annotations.py b/src/odm2datamodels/models/annotations.py
@@ -0,0 +1,45 @@
+"""Data models corresponding to the tables under the ODM2Annotations schema
+	Reference: http://odm2.github.io/ODM2/schemas/ODM2_Current/schemas/ODM2Annotations.html
+"""
+
+class ActionAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ActionAnnotations.html"""
+
+class Annotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_Annotations.html"""
+
+class CategoricalResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_CategoricalResultValueAnnotations.html"""
+
+class EquipmentAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_EquipmentAnnotations.html"""
+
+class MethodAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_MethodAnnotations.html"""
+
+class PointCoverageResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_PointCoverageEesultValueAnnotations.html"""
+
+class ProfileResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ProfileResultValueAnnotations.html"""
+
+class ResultAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ResultAnnotations.html"""
+
+class SamplingFeatureAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SamplingFeatureAnnotations.html"""
+
+class SectionResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SectionResultValueAnnotations.html"""
+
+class SpectraResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SpectraResultValueAnnotations.html"""
+
+class TimeSeriesResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TimeSeriesResultValueAnnotations.html"""
+
+class TrajectoryResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TrajectoryResultValueAnnotations.html"""
+
+class TransectResultValueAnnotations():
+	"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TransectResultValueAnnotations.html"""
diff --git a/src/odm2datamodels/models/auth.py b/src/odm2datamodels/models/auth.py
@@ -0,0 +1,24 @@
+"""
+"""
+
+class Accounts():
+    """"""
+
+#PRT - even though this is a CV table it is for account auth so we might move this to the auth module.
+class CV_Permission():
+	""""""
+
+class OrganizationsPermissions():
+    """"""
+
+class OrganizationsSamplingFeatures():
+    """"""
+
+class ResultsPermissions():
+    """"""
+
+class Roles():
+    """"""
+
+class SamplingFeaturesPermissions():
+    """"""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .base import ODM2DataModels as ODM2DataModels