Skip to content

Commit

Permalink
Merge pull request #1 from ODM2/develop
Browse files Browse the repository at this point in the history
PreRelease 0.0.1
  • Loading branch information
ptomasula authored Apr 21, 2022
2 parents 4417cdb + 73d7bfd commit 22493b2
Show file tree
Hide file tree
Showing 21 changed files with 768 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#build files
dist/*
*.egg-info

*.pyc
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# ODM2DataModels

## What is this?
odm2datamodels is a Python package that provides a set of object-relational mapping (ORM) data models for the [Observations Data Model Version 2.1](http://www.odm2.org/). This data models are built of the [SQLAlchemy](https://www.sqlalchemy.org/) and provide a convenient way of interfacing with an ODM2.1 database.

## Core Features
The primary is the `ODM2DataModels` class, which once instantiated, provides access to the set of ORM ODM2.1 data models and an instance of an ODM2Engine which provide utility function to perform basic Create, Read, Update, Delete operations as well are read execution of custom SQLQueries constructed using a SQLAlchemy [Select object](https://docs.sqlalchemy.org/en/14/orm/queryguide.html#select-statements) or [Query Object](https://docs.sqlalchemy.org/en/14/orm/query.html#sqlalchemy.orm.Query)

## How to install?
Presently the build files are only available on our [github repository](https://github.com/ODM2/ODM2DataModels)

Though we are aiming to release to the [Python Package Index (PyPI)](https://pypi.org/) and [Conda](https://docs.conda.io/en/latest/) in the near future.

## Testing and Database Dialect Support
### Testing Method
Presently very limited testing has been conducted and has primarily been through an implementation of a REST API with limited coverage of selected data models. Further expanding and automating testing is an area for future updates.
### Database Dialect Support
These data models have only been validated for a PostgreSQL database running a deployment of the ODM2.1 schema.



3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools>=42"]
build-backend = "setuptools.build_meta"
27 changes: 27 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

[metadata]
name = odm2datamodels
description = "Collection of object-relational mapping (ORM) data models for ODM2"
long_description = file: README.md
long_description_content = text/markdown
version = 0.0.1
author = "ODM2 Team"
author_email = ""
url = https://github.com/ODM2/ODM2DataModels
project_urls =
bugtracker = https://github.com/ODM2/ODM2DataModels/issueshttps://github.com/pypa/sampleproject/issues
keywords='Observations Data Model ODM2'

[options]
packages = find:
package_dir =
= src

python_requires = >=3.8
install_requires =
sqlalchemy>=1.4.32
pandas>=1.4
geoalchemy2>=0.6.3

[options.packages.find]
where = src
1 change: 1 addition & 0 deletions src/odm2datamodels/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .base import ODM2DataModels as ODM2DataModels
234 changes: 234 additions & 0 deletions src/odm2datamodels/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import sqlalchemy
from sqlalchemy.sql.expression import Select
from sqlalchemy.orm import Query
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declared_attr, declarative_base
import geoalchemy2

import pickle
from enum import Enum
from typing import Dict, Union, Any, Type
import warnings

import pandas as pd

from .exceptions import ObjectNotFound

from .models import annotations
from .models import auth
from .models import core
from .models import cv
from .models import dataquality
from .models import equipment
from .models import extensionproperties
from .models import externalidentifiers
from .models import labanalyses
from .models import provenance
from .models import results
from .models import samplingfeatures
from .models import simulation


class OutputFormats(Enum):
JSON ='JSON'
DATAFRAME = 'DATAFRAME'
DICT = 'DICT'

class Base():

@declared_attr
def __tablename__(self) -> str:
cls_name = str(self.__name__)
return cls_name.lower()

@classmethod
def from_dict(cls, attributes_dict:Dict) -> object:
"""Alternative constructor that uses dictionary to populate attributes"""
instance = cls.__new__(cls)
instance.__init__()
for key, value in attributes_dict.items():
if hasattr(instance, key):
if value == '': value = None
setattr(instance, key, value)
return instance

def to_dict(self) -> Dict[str,Any]:
"""Converts attributes into a dictionary"""
columns = self.__table__.columns.keys()
output_dict = {}
for column in columns:
output_dict[column] = getattr(self,column)
return output_dict

def update_from_dict(self, attributes_dict:Dict[str, any]) -> None:
"""Updates instance attributes based on provided dictionary"""
for key, value in attributes_dict.items():
if hasattr(self, key):
if value == '': value = None
setattr(self, key, value)

@classmethod
def get_pkey_name(cls) -> Union[str,None]:
""" Returns the primary key field name for a given model"""
columns = cls.__table__.columns
for column in columns:
if column.primary_key: return column.name
return None

class ODM2Engine:

def __init__(self, session_maker:sqlalchemy.orm.sessionmaker) -> None:
self.session_maker = session_maker

def read_query(self,
query: Union[Query, Select],
output_format:OutputFormats=OutputFormats.JSON,
orient:str='records') -> Union[str, pd.DataFrame]:
with self.session_maker() as session:
if isinstance(query, Select):
df = pd.read_sql(query, session.bind)
else:
df = pd.read_sql(query.statement, session.bind)

if output_format == OutputFormats.JSON:
return df.to_json(orient=orient)
elif output_format == OutputFormats.DATAFRAME:
return df
elif output_format == OutputFormats.DICT:
return df.to_dict()
raise TypeError("Unknown output format")

def insert_query(self) -> None:
"""Placeholder for bulk insert"""
#accept dataframe & model
#use pandas to_sql method to perform insert
#if except return false or maybe raise error
#else return true
raise NotImplementedError

def create_object(self, obj:object) -> Union[int, str]:
pkey_name = obj.get_pkey_name()
setattr(obj, pkey_name, None)

with self.session_maker() as session:
session.add(obj)
session.commit()
pkey_value = getattr(obj, pkey_name)
return pkey_value

def read_object(self, model:Type[Base], pkey:Union[int, str],
output_format: OutputFormats=OutputFormats.DICT,
orient:str='records') -> Dict[str, Any]:

with self.session_maker() as session:
obj = session.get(model, pkey)
pkey_name = model.get_pkey_name()
if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
session.commit()

obj_dict = obj.to_dict()
if output_format == OutputFormats.DICT:
return obj_dict

else:
# convert to series if only one row
keys = list(obj_dict.keys())
if not isinstance(obj_dict[keys[0]], list):
for key in keys:
new_value = [obj_dict[key]]
obj_dict[key] = new_value

obj_df = pd.DataFrame.from_dict(obj_dict)
if output_format == OutputFormats.DATAFRAME:
return obj_df
elif output_format == OutputFormats.JSON:
return obj_df.to_json(orient=orient)
raise TypeError("Unknown output format")


def update_object(self, model:Type[Base], pkey:Union[int,str], data:Dict[str, Any]) -> None:
if not isinstance(data, dict):
data = data.dict()
pkey_name = model.get_pkey_name()
if pkey_name in data:
data.pop(pkey_name)
with self.session_maker() as session:
obj = session.get(model, pkey)
if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
obj.update_from_dict(data)
session.commit()

def delete_object(self, model:Type[Base], pkey:Union[int, str]) -> None:
with self.session_maker() as session:
obj = session.get(model, pkey)
pkey_name = model.get_pkey_name()
if obj is None: raise ObjectNotFound(f"No '{model.__name__}' object found with {pkey_name} = {pkey}")
session.delete(obj)
session.commit()

class Models:

def __init__(self, base_model) -> None:
self._base_model = base_model
self._process_schema(annotations)
self._process_schema(auth)
self._process_schema(core)
self._process_schema(cv)
self._process_schema(dataquality)
self._process_schema(equipment)
self._process_schema(extensionproperties)
self._process_schema(externalidentifiers)
self._process_schema(labanalyses)
self._process_schema(provenance)
self._process_schema(results)
self._process_schema(samplingfeatures)
self._process_schema(simulation)

def _process_schema(self, schema:str) -> None:
classes = [c for c in dir(schema) if not c.startswith('__')]
base = tuple([self._base_model])
for class_name in classes:
model = getattr(schema, class_name)
model_attribs = self._trim_dunders(dict(model.__dict__.copy()))
extended_model = type(class_name, base, model_attribs)
setattr(self, class_name, extended_model)

def _trim_dunders(self, dictionary:Dict[str, Any]) -> Dict[str, Any]:
return { k:v for k, v in dictionary.items() if not k.startswith('__') }

class ODM2DataModels():

def __init__(self, engine:sqlalchemy.engine, schema:str='odm2', cache_path:str=None) -> None:

self._schema = schema
self._cache_path = cache_path

self._engine = engine
self._session = sqlalchemy.orm.sessionmaker(self._engine)
self._cached= False
self.odm2_engine: ODM2Engine = ODM2Engine(self._session)

self._model_base = self._prepare_model_base()
self.models = Models(self._model_base)
if not self._cached:
self._prepare_automap_models()

def _prepare_model_base(self):
try:
with open(self._cache_path, 'rb') as file:
metadata = pickle.load(file=file)
self._cached = True
return declarative_base(cls=Base, bind=self._engine, metadata=metadata)
except FileNotFoundError:
metadata = sqlalchemy.MetaData(schema=self._schema)
self._cached = False
return automap_base(cls=Base, metadata=metadata)

def _prepare_automap_models(self):
self._model_base.prepare(self._engine)
if not self._cache_path: return
try:
with open(self._cache_path, 'wb') as file:
pickle.dump(self._model_base.metadata, file)
except FileNotFoundError:
warnings.warn('Unable to cache models which may lead to degraded performance.', RuntimeWarning)
5 changes: 5 additions & 0 deletions src/odm2datamodels/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class ObjectNotFound(Exception):

def __init__(self, message:str) -> None:
self.message = message
super().__init__()
13 changes: 13 additions & 0 deletions src/odm2datamodels/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from . import annotations
from . import auth
from . import core
from . import cv
from . import dataquality
from . import equipment
from . import extensionproperties
from . import externalidentifiers
from . import labanalyses
from . import provenance
from . import results
from . import samplingfeatures
from . import simulation
45 changes: 45 additions & 0 deletions src/odm2datamodels/models/annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Data models corresponding to the tables under the ODM2Annotations schema
Reference: http://odm2.github.io/ODM2/schemas/ODM2_Current/schemas/ODM2Annotations.html
"""

class ActionAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ActionAnnotations.html"""

class Annotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_Annotations.html"""

class CategoricalResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_CategoricalResultValueAnnotations.html"""

class EquipmentAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_EquipmentAnnotations.html"""

class MethodAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_MethodAnnotations.html"""

class PointCoverageResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_PointCoverageEesultValueAnnotations.html"""

class ProfileResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ProfileResultValueAnnotations.html"""

class ResultAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_ResultAnnotations.html"""

class SamplingFeatureAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SamplingFeatureAnnotations.html"""

class SectionResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SectionResultValueAnnotations.html"""

class SpectraResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_SpectraResultValueAnnotations.html"""

class TimeSeriesResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TimeSeriesResultValueAnnotations.html"""

class TrajectoryResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TrajectoryResultValueAnnotations.html"""

class TransectResultValueAnnotations():
"""http://odm2.github.io/ODM2/schemas/ODM2_Current/tables/ODM2Annotations_TransectResultValueAnnotations.html"""
24 changes: 24 additions & 0 deletions src/odm2datamodels/models/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
"""

class Accounts():
""""""

#PRT - even though this is a CV table it is for account auth so we might move this to the auth module.
class CV_Permission():
""""""

class OrganizationsPermissions():
""""""

class OrganizationsSamplingFeatures():
""""""

class ResultsPermissions():
""""""

class Roles():
""""""

class SamplingFeaturesPermissions():
""""""
Loading

0 comments on commit 22493b2

Please sign in to comment.