-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
estuary-cdk: extend
ResourceConfig
` to allow a cron expression for …
…re-initializing a resource's state on a schedule For a couple capture connectors, bindings need backfilled on some schedule. So far, we've been manually backfilling these bindings every day/week, and it would be nice if a resource config level setting existed to automatically backfill. This commit extends the existing `ResourceConfig` with the `ResourceConfigWithSchedule` class, allowing resources to specify a schedule for their state to be re-initialized (i.e backfill the binding). This schedule must either be an empty string or a valid cron expression accepted by `pycron`. In order to know when a binding's state was last initialized, the `last_initialized` property has been added to all bindings' states. Since the resource config is the same for all bindings in a connector, a connector that needs to use `ResourceConfigWithSchedule` for at least one binding must use it for all bindings. `pycron` was chosen as the cron package for a few reasons: - The typically clear choice for a Python cron package `croniter` may be unpublished from PyPI in the near future due to the maintainer's concerns around EU CRA laws & liabilities. It is not clear that a different maintainer will take over & keep `croniter` available, and beyond forking & maintaining our own `croniter` repo, I don't see a longterm solution to using that package in our code. - `pycron` has the minimal functionality we need & appears to be somewhat actively maintained & used by others. If there are bindings with cron schedules, we spawn a coroutine to stop the connector at the soonest future scheduled re-initialzation time to adhere to the bindings' schedules relatively closely.
- Loading branch information
Showing
4 changed files
with
151 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from datetime import datetime, UTC, timedelta | ||
|
||
import pycron | ||
|
||
CONNECTOR_RESTART_INTERVAL = timedelta(hours=24) | ||
SMALLEST_CRON_INCREMENT = timedelta(minutes=1) | ||
|
||
|
||
# next_fire returns the earliest datetime between start and end (exclusive) that matches the cron expression. | ||
def next_fire( | ||
cron_expression: str, | ||
start: datetime, | ||
end: datetime = datetime.now(tz=UTC) + CONNECTOR_RESTART_INTERVAL | ||
) -> datetime | None: | ||
if not cron_expression: | ||
return None | ||
|
||
dt = start.replace(second=0, microsecond=0) | ||
|
||
while dt < end: | ||
dt += SMALLEST_CRON_INCREMENT | ||
if pycron.is_now(cron_expression, dt): | ||
return dt | ||
|
||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import pytest | ||
import re | ||
from estuary_cdk.capture.common import CRON_REGEX | ||
|
||
pattern = re.compile(CRON_REGEX) | ||
|
||
@pytest.mark.parametrize("cron", [ | ||
# Empty input | ||
"", | ||
# Wildcards for all positions. | ||
"* * * * *", | ||
# Steps | ||
"*/5 * * * *", | ||
# Minimum values | ||
"0 0 1 1 0", | ||
# Maximum values | ||
"59 23 31 12 6", | ||
# Lists | ||
"0,30,1 0,12 1,4,5,23 3,4,6 1,4", | ||
# Ranges | ||
"0-59 0-23 1-31 1-12 0-6", | ||
]) | ||
def test_valid_cron(cron): | ||
assert pattern.match(cron) is not None | ||
|
||
@pytest.mark.parametrize("cron", [ | ||
# Number of arguments | ||
"*", | ||
"* *", | ||
"* * * *", | ||
"* * * * * *", | ||
# Invalid characters | ||
"abc123 * * * *", | ||
# Negative numbers | ||
"-1 * * * *", | ||
# Beyond min/max | ||
"60 * * * *", | ||
"* 24 * * *", | ||
"* * 0 * *", | ||
"* * 32 * *", | ||
"* * * 0 *", | ||
"* * * 13 *", | ||
"* * * * 7", | ||
# Invalid list syntax | ||
",0,1 * * * *", | ||
"0,1, * * * *", | ||
# Invalid range syntax | ||
"0-1- * * * *", | ||
"0- * * * *", | ||
]) | ||
def test_invalid_cron(cron): | ||
assert pattern.match(cron) is None |