Skip to content

Commit

Permalink
Correctly record the time when a dataset is created and update the la…
Browse files Browse the repository at this point in the history
…st_used time (#945)

* Correctly record the time when a dataset is created. Also update the last_used time whenever
the dataset is used in a transform.

The create time had incorrectly been set to 0 and the last_used time set to the current date/time.
We now correclty set the last_updated time to the current time along with last_used. Then any time the
dataset is referenced, we update last_used.

* Make initial dataset last used and last updated consistent for new records
  • Loading branch information
BenGalewsky authored Jan 22, 2025
1 parent a769248 commit ca3d9ec
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
14 changes: 10 additions & 4 deletions servicex_app/servicex_app/dataset_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@ def from_did(cls, did: DIDParser, logger: Logger, extras: dict[str, str] = None,
db: SQLAlchemy = None):
dataset = Dataset.find_by_name(did.full_did)
if not dataset:
dataset_timestamp = datetime.now(tz=timezone.utc)
dataset = Dataset(
name=did.full_did,
last_used=datetime.now(tz=timezone.utc),
last_updated=datetime.fromtimestamp(0),
last_used=dataset_timestamp,
last_updated=dataset_timestamp,
lookup_status=DatasetStatus.created,
did_finder=did.scheme
)
Expand All @@ -64,6 +65,8 @@ def from_did(cls, did: DIDParser, logger: Logger, extras: dict[str, str] = None,
else:
logger.info(f"Found existing dataset: {dataset.name}, id is {dataset.id}",
extra=extras)
dataset.last_used = datetime.now(tz=timezone.utc)
dataset.save_to_db()

return cls(dataset, logger, db)

Expand All @@ -79,10 +82,11 @@ def from_file_list(cls, file_list: List[str], logger: Logger,
dataset = Dataset.find_by_name(name)

if not dataset:
dataset_timestamp = datetime.now(tz=timezone.utc)
dataset = Dataset(
name=name,
last_used=datetime.now(tz=timezone.utc),
last_updated=datetime.fromtimestamp(0),
last_used=dataset_timestamp,
last_updated=dataset_timestamp,
lookup_status=DatasetStatus.complete,
did_finder='user',
files=[
Expand All @@ -101,6 +105,8 @@ def from_file_list(cls, file_list: List[str], logger: Logger,
else:
logger.info(f"Found existing dataset for file list. Dataset Id is {dataset.id}",
extra=extras)
dataset.last_used = datetime.now(tz=timezone.utc)
dataset.save_to_db()

return cls(dataset, logger, db)

Expand Down
10 changes: 6 additions & 4 deletions servicex_app/servicex_app_test/test_dataset_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,15 @@ def test_from_existing_did(self, client):
did = "rucio://my-did?files=1"
with client.application.app_context():
d = Dataset(name=did, did_finder="rucio", lookup_status=DatasetStatus.looking,
last_used=datetime.now(tz=timezone.utc),
last_updated=datetime.fromtimestamp(0))
last_used=datetime.fromtimestamp(0),
last_updated=datetime.now(tz=timezone.utc))
d.save_to_db()
dm = DatasetManager.from_did(DIDParser(did), logger=client.application.logger, db=db)
assert dm.dataset.name == did
assert dm.dataset.did_finder == "rucio"
assert dm.dataset.lookup_status == DatasetStatus.looking
assert dm.dataset.id == d.id
assert dm.dataset.last_used > datetime.fromtimestamp(0)

def test_from_new_file_list(self, client):
file_list = ["root://eospublic.cern.ch/1.root", "root://eospublic.cern.ch/2.root"]
Expand All @@ -127,8 +128,8 @@ def test_from_existing_file_list(self, client):
with client.application.app_context():
d = Dataset(name=DatasetManager.file_list_hash(file_list),
did_finder="user", lookup_status=DatasetStatus.created,
last_used=datetime.now(tz=timezone.utc),
last_updated=datetime.fromtimestamp(0),
last_used=datetime.fromtimestamp(0),
last_updated=datetime.now(tz=timezone.utc),
files=[
DatasetFile(
paths=file,
Expand All @@ -144,6 +145,7 @@ def test_from_existing_file_list(self, client):
assert dm.dataset.did_finder == "user"
assert dm.dataset.lookup_status == DatasetStatus.created
assert dm.dataset.id == d.id
assert dm.dataset.last_used > datetime.fromtimestamp(0)

def test_from_dataset_id(self, client):
file_list = ["root://eospublic.cern.ch/1.root", "root://eospublic.cern.ch/2.root"]
Expand Down

0 comments on commit ca3d9ec

Please sign in to comment.