Skip to content

Commit

Permalink
Merge pull request #81 from I-GUIDE/80-mediaobject-additional-attributes
Browse files Browse the repository at this point in the history
Additional attributes for MediaObject schema model
  • Loading branch information
pkdash authored Apr 17, 2024
2 parents b76d4bf + a84ad13 commit 2952217
Show file tree
Hide file tree
Showing 7 changed files with 221 additions and 8 deletions.
4 changes: 0 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ down:
build:
docker-compose build

.PHONY: test
test:
docker-compose exec api pytest tests

.PHONY: format
format:
docker-compose run api $(isort)
Expand Down
1 change: 1 addition & 0 deletions api/adapters/hydroshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def to_dataset_media_object(self):
media_object.encodingFormat = self.content_type
media_object.contentSize = f"{self.size/1000.00} KB"
media_object.name = self.file_name
media_object.sha256 = self.checksum
return media_object


Expand Down
21 changes: 21 additions & 0 deletions api/models/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ class IsPartOf(CreativeWork):
)


class MediaObjectPartOf(CreativeWork):
url: Optional[HttpUrl] = Field(title="URL", description="The URL address to the related metadata document.")
description: Optional[str] = Field(
description="Information about a related metadata document."
)


class SubjectOf(CreativeWork):
url: Optional[HttpUrl] = Field(
title="URL",
Expand Down Expand Up @@ -410,6 +417,11 @@ class MediaObject(SchemaBaseModel):
title="Source organization",
description="The organization that provided the media object."
)
sha256: Optional[str] = Field(title="SHA-256", description="The SHA-256 hash of the media object.")
isPartOf: Optional[List[MediaObjectPartOf]] = Field(
title="Is part of",
description="Link to or citation for a related metadata document that this media object is a part of",
)

@validator('contentSize')
def validate_content_size(cls, v):
Expand Down Expand Up @@ -438,6 +450,15 @@ def validate_content_size(cls, v):

return v

# TODO: not validating the SHA-256 hash for now as the hydroshare content file hash is in md5 format
# @validator('sha256')
# def validate_sha256_string_format(cls, v):
# if v:
# v = v.strip()
# if v and not re.match(r"^[a-fA-F0-9]{64}$", v):
# raise ValueError('invalid SHA-256 format')
# return v


class CoreMetadata(SchemaBaseModel):
context: HttpUrl = Field(
Expand Down
130 changes: 128 additions & 2 deletions api/models/schemas/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1655,11 +1655,56 @@
"required": [
"name"
]
},
"sha256": {
"title": "SHA-256",
"description": "The SHA-256 hash of the media object.",
"type": "string"
},
"isPartOf": {
"title": "Is part of",
"description": "Link to or citation for a related metadata document that this media object is a part of",
"type": "array",
"items": {
"title": "MediaObjectPartOf",
"type": "object",
"properties": {
"@type": {
"title": "@Type",
"description": "Submission type can include various forms of content, such as datasets, software source code, digital documents, etc.",
"default": "CreativeWork",
"type": "string"
},
"name": {
"title": "Name or title",
"description": "Submission's name or title",
"type": "string"
},
"url": {
"title": "URL",
"description": "The URL address to the related metadata document.",
"minLength": 1,
"maxLength": 2083,
"type": "string",
"pattern": "^(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?$",
"errorMessage": {
"pattern": "must match format \"url\""
}
},
"description": {
"title": "Description",
"description": "Information about a related metadata document.",
"type": "string"
}
},
"required": [
"name"
]
}
}
},
"required": [
"contentUrl",
"encodingFormat",
"contentSize",
"name"
]
Expand Down Expand Up @@ -2813,6 +2858,42 @@
"name"
]
},
"MediaObjectPartOf": {
"title": "MediaObjectPartOf",
"type": "object",
"properties": {
"@type": {
"title": "@Type",
"description": "Submission type can include various forms of content, such as datasets, software source code, digital documents, etc.",
"default": "CreativeWork",
"type": "string"
},
"name": {
"title": "Name or title",
"description": "Submission's name or title",
"type": "string"
},
"url": {
"title": "URL",
"description": "The URL address to the related metadata document.",
"minLength": 1,
"maxLength": 2083,
"type": "string",
"pattern": "^(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?$",
"errorMessage": {
"pattern": "must match format \"url\""
}
},
"description": {
"title": "Description",
"description": "Information about a related metadata document.",
"type": "string"
}
},
"required": [
"name"
]
},
"MediaObject": {
"title": "MediaObject",
"type": "object",
Expand Down Expand Up @@ -3490,11 +3571,56 @@
"required": [
"name"
]
},
"sha256": {
"title": "SHA-256",
"description": "The SHA-256 hash of the media object.",
"type": "string"
},
"isPartOf": {
"title": "Is part of",
"description": "Link to or citation for a related metadata document that this media object is a part of",
"type": "array",
"items": {
"title": "MediaObjectPartOf",
"type": "object",
"properties": {
"@type": {
"title": "@Type",
"description": "Submission type can include various forms of content, such as datasets, software source code, digital documents, etc.",
"default": "CreativeWork",
"type": "string"
},
"name": {
"title": "Name or title",
"description": "Submission's name or title",
"type": "string"
},
"url": {
"title": "URL",
"description": "The URL address to the related metadata document.",
"minLength": 1,
"maxLength": 2083,
"type": "string",
"pattern": "^(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?$",
"errorMessage": {
"pattern": "must match format \"url\""
}
},
"description": {
"title": "Description",
"description": "Information about a related metadata document.",
"type": "string"
}
},
"required": [
"name"
]
}
}
},
"required": [
"contentUrl",
"encodingFormat",
"contentSize",
"name"
]
Expand Down
12 changes: 10 additions & 2 deletions tests/data/core_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@
"geo": {
"@type": "GeoShape",
"box": "40.1126 -88.2249 40.1126 -88.2249"
}
},
"additionalProperty": []
},
"hasPart": [
{
Expand All @@ -104,7 +105,14 @@
"contentUrl": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/USGS_Harvey_gages_TxLaMsAr.csv",
"encodingFormat": "text/csv",
"contentSize": "0.17 MB",
"name": "USGS gage locations within the Harvey-affected areas in Texas"
"name": "USGS gage locations within the Harvey-affected areas in Texas",
"sha256": "830f4b50e78e8a8fb0f7eee7369171dacbcaa43cc2c4deb59cef8e4fd2f641c5",
"additionalProperty": [],
"variableMeasured": null,
"spatialCoverage": null,
"temporalCoverage": null,
"sourceOrganization": null,
"isPartOf": null
}
],
"citation": ["Citation for the dataset"]
Expand Down
58 changes: 58 additions & 0 deletions tests/test_core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,15 @@ async def test_core_schema_associated_media_cardinality(core_data, core_model, m
"contentUrl": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/USGS_Harvey_gages_TxLaMsAr.csv",
"encodingFormat": "text/csv",
"contentSize": "0.17 GB",
"sha256": "2fba6f2ebac562dac6a57acf0fdc5fdfabc9654b3c910aa6ef69cf4385997e19",
"name": "USGS gage locations within the Harvey-affected areas in Texas",
},
{
"@type": "VideoObject",
"contentUrl": "https://www.hydroshare.org/resource/81cb3f6c0dde4433ae4f43a26a889864/data/contents/HydroClientMovie.mp4",
"encodingFormat": "video/mp4",
"contentSize": "79.2 MB",
"sha256": "2fba6f2ebac562dac6a57acf0fdc5fdfabc9654b3c910aa6ef69cf4385997e20",
"name": "HydroClient Video",
},
]
Expand All @@ -306,6 +308,7 @@ async def test_core_schema_associated_media_cardinality(core_data, core_model, m
"contentUrl": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/USGS_Harvey_gages_TxLaMsAr.csv",
"encodingFormat": "text/csv",
"contentSize": "0.17 MB",
"sha256": "2fba6f2ebac562dac6a57acf0fdc5fdfabc9654b3c910aa6ef69cf4385997e19",
"name": "USGS gage locations within the Harvey-affected areas in Texas",
}
]
Expand All @@ -326,11 +329,17 @@ async def test_core_schema_associated_media_cardinality(core_data, core_model, m
assert core_model_instance.associatedMedia[1].contentSize == associated_media[1]["contentSize"]
assert core_model_instance.associatedMedia[0].encodingFormat == associated_media[0]["encodingFormat"]
assert core_model_instance.associatedMedia[1].encodingFormat == associated_media[1]["encodingFormat"]
assert core_model_instance.associatedMedia[0].contentUrl == associated_media[0]["contentUrl"]
assert core_model_instance.associatedMedia[1].contentUrl == associated_media[1]["contentUrl"]
assert core_model_instance.associatedMedia[0].sha256 == associated_media[0]["sha256"]
assert core_model_instance.associatedMedia[1].sha256 == associated_media[1]["sha256"]
elif multiple_media is not None:
assert core_model_instance.associatedMedia[0].type == associated_media[0]["@type"]
assert core_model_instance.associatedMedia[0].name == associated_media[0]["name"]
assert core_model_instance.associatedMedia[0].contentSize == associated_media[0]["contentSize"]
assert core_model_instance.associatedMedia[0].encodingFormat == associated_media[0]["encodingFormat"]
assert core_model_instance.associatedMedia[0].contentUrl == associated_media[0]["contentUrl"]
assert core_model_instance.associatedMedia[0].sha256 == associated_media[0]["sha256"]


@pytest.mark.parametrize(
Expand Down Expand Up @@ -369,6 +378,7 @@ async def test_core_schema_associated_media_content_size(
"contentUrl": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/USGS_Harvey_gages_TxLaMsAr.csv",
"encodingFormat": "text/csv",
"contentSize": content_size_format,
"sha256": "2fba6f2ebac562dac6a57acf0fdc5fdfabc9654b3c910aa6ef69cf4385997e19",
"name": "USGS gage locations within the Harvey-affected areas in Texas",
}
]
Expand Down Expand Up @@ -397,6 +407,7 @@ async def test_core_schema_associated_media_encoding_format_optional(
"@type": "MediaObject",
"contentUrl": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/USGS_Harvey_gages_TxLaMsAr.csv",
"contentSize": "100.17 KB",
"sha256": "2fba6f2ebac562dac6a57acf0fdc5fdfabc9654b3c910aa6ef69cf4385997e19",
"name": "USGS gage locations within the Harvey-affected areas in Texas",
}
]
Expand All @@ -406,6 +417,53 @@ async def test_core_schema_associated_media_encoding_format_optional(
assert core_model_instance.associatedMedia[0].encodingFormat is None


@pytest.mark.parametrize("set_is_part_of", [True, False])
@pytest.mark.asyncio
async def test_core_schema_associated_media_is_part_of_optional(
core_data, core_model, set_is_part_of
):
"""Test that a core metadata pydantic model can be created from core metadata json.
Purpose of the test is to validate core metadata schema as defined by the pydantic model where we are testing
that isPartOf attribute of the associatedMedia property is optional.
Note: This test does nat add a record to the database.
"""

core_data = core_data
core_model = core_model

core_data["associatedMedia"] = [
{
"@type": "MediaObject",
"contentUrl": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/logan.nc",
"contentSize": "100.17 KB",
"encodingFormat": "application/x-netcdf",
"sha256": "2fba6f2ebac562dac6a57acf0fdc5fdfabc9654b3c910aa6ef69cf4385997e19",
"name": "logan.nc",
}
]

if set_is_part_of:
core_data["associatedMedia"][0]["isPartOf"] = [
{
"@type": "CreativeWork",
"name": "logan.nc.json",
"url": "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/logan.nc.json",
}
]
# validate the data model
core_model_instance = await utils.validate_data_model(core_data, core_model)
if set_is_part_of:
assert len(core_model_instance.associatedMedia[0].isPartOf) == 1
assert core_model_instance.associatedMedia[0].isPartOf[0].type == "CreativeWork"
assert core_model_instance.associatedMedia[0].isPartOf[0].name == "logan.nc.json"
assert (
core_model_instance.associatedMedia[0].isPartOf[0].url
== "https://www.hydroshare.org/resource/51d1539bf6e94b15ac33f7631228118c/data/contents/logan.nc.json"
)
else:
assert core_model_instance.associatedMedia[0].isPartOf is None


@pytest.mark.parametrize("set_additional_property", [True, False])
@pytest.mark.asyncio
async def test_core_schema_associated_media_additional_property(
Expand Down
3 changes: 3 additions & 0 deletions tests/test_hydroshare_meta_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,18 @@ async def test_hydroshare_resource_meta_adapter(hydroshare_resource_metadata, co
assert media.contentUrl == f"{media_base_url}/model-program/V.dat"
assert media.encodingFormat == "None"
assert media.contentSize == "124.144 KB"
assert media.sha256 == "a0b00d911d09e69bdbee0033e40414f9"
elif media.name == "Qsi.nc":
assert media.contentUrl == f"{media_base_url}/model-program/Qsi.nc"
assert media.encodingFormat == "application/x-netcdf"
assert media.contentSize == "20.144 KB"
assert media.sha256 == "93b546c41fca467496900d0f2415c1de"
else:
assert media.name == "README.md"
assert media.contentUrl == f"{media_base_url}/README.md"
assert media.encodingFormat == "text/markdown"
assert media.contentSize == "4.422 KB"
assert media.sha256 == "7d460cb12903a965d144cddcb2b62eac"


@pytest.mark.asyncio
Expand Down

0 comments on commit 2952217

Please sign in to comment.