From 707a6c66fb6dea5c6b9be0f0c0c8281e1d1b2154 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Wed, 5 Feb 2025 19:06:57 +0200
Subject: [PATCH 01/26] Add API Budget

---
 .../declarative_component_schema.yaml         | 166 ++++++++++++++++++
 .../models/declarative_component_schema.py    | 130 ++++++++++++++
 .../parsers/model_to_component_factory.py     | 130 +++++++++++++-
 .../declarative/requesters/http_requester.py  |   3 +
 4 files changed, 423 insertions(+), 6 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index d51d4c922..ea044f816 100644
--- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -1365,6 +1365,168 @@ definitions:
       $parameters:
         type: object
         additional_properties: true
+  APIBudget:
+    title: API Budget
+    description: Component that defines how many requests can be made to the API in a given time frame.
+    type: object
+    required:
+      - type
+    properties:
+      type:
+        type: string
+        enum: [APIBudget]
+      policies:
+        title: Policies
+        description: List of policies that define the rate limits for different types of requests.
+        type: array
+        items:
+          anyOf:
+            - "$ref": "#/definitions/FixedWindowCallRatePolicy"
+            - "$ref": "#/definitions/MovingWindowCallRatePolicy"
+            - "$ref": "#/definitions/UnlimitedCallRatePolicy"
+      ratelimit_reset_header:
+        title: Rate Limit Reset Header
+        description: The name of the header that contains the timestamp for when the rate limit will reset.
+        type: string
+        default: "ratelimit-reset"
+      ratelimit_remaining_header:
+        title: Rate Limit Remaining Header
+        description: The name of the header that contains the number of remaining requests.
+        type: string
+        default: "ratelimit-remaining"
+      status_codes_for_ratelimit_hit:
+        title: Status Codes for Rate Limit Hit
+        description: List of HTTP status codes that indicate a rate limit has been hit.
+        type: array
+        items:
+          type: integer
+        default: [429]
+      maximum_attempts_to_acquire:
+        title: Maximum Attempts to Acquire
+        description: The maximum number of attempts to acquire a call before giving up.
+        type: integer
+        default: 100000
+    additionalProperties: true
+  FixedWindowCallRatePolicy:
+    title: Fixed Window Call Rate Policy
+    description: A policy that allows a fixed number of calls within a specific time window.
+    type: object
+    required:
+      - type
+      - next_reset_ts
+      - period
+      - call_limit
+      - matchers
+    properties:
+      type:
+        type: string
+        enum: [FixedWindowCallRatePolicy]
+      next_reset_ts:
+        title: Next Reset Timestamp
+        description: The timestamp when the rate limit will reset.
+        type: string
+        format: date-time
+      period:
+        title: Period
+        description: The time interval for the rate limit window.
+        type: string
+        format: duration
+      call_limit:
+        title: Call Limit
+        description: The maximum number of calls allowed within the period.
+        type: integer
+      matchers:
+        title: Matchers
+        description: List of matchers that define which requests this policy applies to.
+        type: array
+        items:
+          "$ref": "#/definitions/HttpRequestMatcher"
+    additionalProperties: true
+  MovingWindowCallRatePolicy:
+    title: Moving Window Call Rate Policy
+    description: A policy that allows a fixed number of calls within a moving time window.
+    type: object
+    required:
+      - type
+      - rates
+      - matchers
+    properties:
+      type:
+        type: string
+        enum: [MovingWindowCallRatePolicy]
+      rates:
+        title: Rates
+        description: List of rates that define the call limits for different time intervals.
+        type: array
+        items:
+          "$ref": "#/definitions/Rate"
+      matchers:
+        title: Matchers
+        description: List of matchers that define which requests this policy applies to.
+        type: array
+        items:
+          "$ref": "#/definitions/HttpRequestMatcher"
+    additionalProperties: true
+  UnlimitedCallRatePolicy:
+    title: Unlimited Call Rate Policy
+    description: A policy that allows unlimited calls for specific requests.
+    type: object
+    required:
+      - type
+      - matchers
+    properties:
+      type:
+        type: string
+        enum: [UnlimitedCallRatePolicy]
+      matchers:
+        title: Matchers
+        description: List of matchers that define which requests this policy applies to.
+        type: array
+        items:
+          "$ref": "#/definitions/HttpRequestMatcher"
+    additionalProperties: true
+  Rate:
+    title: Rate
+    description: Defines a rate limit with a specific number of calls allowed within a time interval.
+    type: object
+    required:
+      - limit
+      - interval
+    properties:
+      limit:
+        title: Limit
+        description: The maximum number of calls allowed within the interval.
+        type: integer
+      interval:
+        title: Interval
+        description: The time interval for the rate limit.
+        type: string
+        format: duration
+    additionalProperties: true
+  HttpRequestMatcher:
+    title: HTTP Request Matcher
+    description: Matches HTTP requests based on method, URL, parameters, and headers.
+    type: object
+    properties:
+      method:
+        title: Method
+        description: The HTTP method to match (e.g., GET, POST).
+        type: string
+      url:
+        title: URL
+        description: The URL to match.
+        type: string
+      params:
+        title: Parameters
+        description: The query parameters to match.
+        type: object
+        additionalProperties: true
+      headers:
+        title: Headers
+        description: The headers to match.
+        type: object
+        additionalProperties: true
+    additionalProperties: true
   DefaultErrorHandler:
     title: Default Error Handler
     description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
@@ -1637,6 +1799,10 @@ definitions:
           - "$ref": "#/definitions/DefaultErrorHandler"
           - "$ref": "#/definitions/CustomErrorHandler"
           - "$ref": "#/definitions/CompositeErrorHandler"
+      api_budget:
+        title: API Budget
+        description: Component that defines how many requests can be made to the API in a given time frame.
+        "$ref": "#/definitions/APIBudget"
       http_method:
         title: HTTP Method
         description: The HTTP method used to fetch data from the source (can be GET or POST).
diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
index 6aa1d35a7..bd5a69f6c 100644
--- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
+++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+from datetime import datetime, timedelta
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union
 
@@ -642,6 +643,36 @@ class OAuthAuthenticator(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 
 
+class Rate(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    limit: int = Field(
+        ...,
+        description="The maximum number of calls allowed within the interval.",
+        title="Limit",
+    )
+    interval: timedelta = Field(
+        ..., description="The time interval for the rate limit.", title="Interval"
+    )
+
+
+class HttpRequestMatcher(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    method: Optional[str] = Field(
+        None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
+    )
+    url: Optional[str] = Field(None, description="The URL to match.", title="URL")
+    params: Optional[Dict[str, Any]] = Field(
+        None, description="The query parameters to match.", title="Parameters"
+    )
+    headers: Optional[Dict[str, Any]] = Field(
+        None, description="The headers to match.", title="Headers"
+    )
+
+
 class DpathExtractor(BaseModel):
     type: Literal["DpathExtractor"]
     field_path: List[str] = Field(
@@ -1578,6 +1609,60 @@ class DatetimeBasedCursor(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 
 
+class FixedWindowCallRatePolicy(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    type: Literal["FixedWindowCallRatePolicy"]
+    next_reset_ts: datetime = Field(
+        ...,
+        description="The timestamp when the rate limit will reset.",
+        title="Next Reset Timestamp",
+    )
+    period: timedelta = Field(
+        ..., description="The time interval for the rate limit window.", title="Period"
+    )
+    call_limit: int = Field(
+        ...,
+        description="The maximum number of calls allowed within the period.",
+        title="Call Limit",
+    )
+    matchers: List[HttpRequestMatcher] = Field(
+        ...,
+        description="List of matchers that define which requests this policy applies to.",
+        title="Matchers",
+    )
+
+
+class MovingWindowCallRatePolicy(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    type: Literal["MovingWindowCallRatePolicy"]
+    rates: List[Rate] = Field(
+        ...,
+        description="List of rates that define the call limits for different time intervals.",
+        title="Rates",
+    )
+    matchers: List[HttpRequestMatcher] = Field(
+        ...,
+        description="List of matchers that define which requests this policy applies to.",
+        title="Matchers",
+    )
+
+
+class UnlimitedCallRatePolicy(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    type: Literal["UnlimitedCallRatePolicy"]
+    matchers: List[HttpRequestMatcher] = Field(
+        ...,
+        description="List of matchers that define which requests this policy applies to.",
+        title="Matchers",
+    )
+
+
 class DefaultErrorHandler(BaseModel):
     type: Literal["DefaultErrorHandler"]
     backoff_strategies: Optional[
@@ -1709,6 +1794,46 @@ class CompositeErrorHandler(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 
 
+class APIBudget(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    type: Literal["APIBudget"]
+    policies: Optional[
+        List[
+            Union[
+                FixedWindowCallRatePolicy,
+                MovingWindowCallRatePolicy,
+                UnlimitedCallRatePolicy,
+            ]
+        ]
+    ] = Field(
+        None,
+        description="List of policies that define the rate limits for different types of requests.",
+        title="Policies",
+    )
+    ratelimit_reset_header: Optional[str] = Field(
+        "ratelimit-reset",
+        description="The name of the header that contains the timestamp for when the rate limit will reset.",
+        title="Rate Limit Reset Header",
+    )
+    ratelimit_remaining_header: Optional[str] = Field(
+        "ratelimit-remaining",
+        description="The name of the header that contains the number of remaining requests.",
+        title="Rate Limit Remaining Header",
+    )
+    status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
+        [429],
+        description="List of HTTP status codes that indicate a rate limit has been hit.",
+        title="Status Codes for Rate Limit Hit",
+    )
+    maximum_attempts_to_acquire: Optional[int] = Field(
+        100000,
+        description="The maximum number of attempts to acquire a call before giving up.",
+        title="Maximum Attempts to Acquire",
+    )
+
+
 class ZipfileDecoder(BaseModel):
     class Config:
         extra = Extra.allow
@@ -1979,6 +2104,11 @@ class HttpRequester(BaseModel):
         description="Error handler component that defines how to handle errors.",
         title="Error Handler",
     )
+    api_budget: Optional[APIBudget] = Field(
+        None,
+        description="Component that defines how many requests can be made to the API in a given time frame.",
+        title="API Budget",
+    )
     http_method: Optional[HttpMethod] = Field(
         HttpMethod.GET,
         description="The HTTP method used to fetch data from the source (can be GET or POST).",
diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index b8eeca1ec..cec9aff25 100644
--- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -112,6 +112,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     AddFields as AddFieldsModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    APIBudget as APIBudgetModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
 )
@@ -226,6 +229,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     FlattenFields as FlattenFieldsModel,
 )
@@ -241,6 +247,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpRequester as HttpRequesterModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    HttpRequestMatcher as HttpRequestMatcherModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpResponseFilter as HttpResponseFilterModel,
 )
@@ -295,6 +304,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     MinMaxDatetime as MinMaxDatetimeModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     NoAuth as NoAuthModel,
 )
@@ -313,6 +325,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ParentStreamConfig as ParentStreamConfigModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    Rate as RateModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     RecordFilter as RecordFilterModel,
 )
@@ -356,6 +371,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     TypesMap as TypesMapModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -469,6 +487,14 @@
     MessageRepository,
     NoopMessageRepository,
 )
+from airbyte_cdk.sources.streams.call_rate import (
+    FixedWindowCallRatePolicy,
+    HttpAPIBudget,
+    HttpRequestMatcher,
+    MovingWindowCallRatePolicy,
+    Rate,
+    UnlimitedCallRatePolicy,
+)
 from airbyte_cdk.sources.streams.concurrent.clamping import (
     ClampingEndProvider,
     ClampingStrategy,
@@ -607,6 +633,12 @@ def _init_mappings(self) -> None:
             StreamConfigModel: self.create_stream_config,
             ComponentMappingDefinitionModel: self.create_components_mapping_definition,
             ZipfileDecoderModel: self.create_zipfile_decoder,
+            APIBudgetModel: self.create_api_budget,
+            FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
+            MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
+            UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
+            RateModel: self.create_rate,
+            HttpRequestMatcherModel: self.create_http_request_matcher,
         }
 
         # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -813,7 +845,8 @@ def create_legacy_to_per_partition_state_migration(
 
         return LegacyToPerPartitionStateMigration(
             partition_router,  # type: ignore # was already checked above
-            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
+            declarative_stream.incremental_sync,
+            # type: ignore # was already checked. Migration can be applied only to incremental streams.
             config,
             declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
         )
@@ -1111,7 +1144,8 @@ def create_concurrent_cursor_from_datetime_based_cursor(
                     clamping_strategy = DayClampingStrategy()
                     end_date_provider = ClampingEndProvider(
                         DayClampingStrategy(is_ceiling=False),
-                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+                        end_date_provider,
+                        # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
                         granularity=cursor_granularity or datetime.timedelta(seconds=1),
                     )
                 case "WEEK":
@@ -1128,14 +1162,16 @@ def create_concurrent_cursor_from_datetime_based_cursor(
                     clamping_strategy = WeekClampingStrategy(weekday)
                     end_date_provider = ClampingEndProvider(
                         WeekClampingStrategy(weekday, is_ceiling=False),
-                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+                        end_date_provider,
+                        # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
                         granularity=cursor_granularity or datetime.timedelta(days=1),
                     )
                 case "MONTH":
                     clamping_strategy = MonthClampingStrategy()
                     end_date_provider = ClampingEndProvider(
                         MonthClampingStrategy(is_ceiling=False),
-                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+                        end_date_provider,
+                        # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
                         granularity=cursor_granularity or datetime.timedelta(days=1),
                     )
                 case _:
@@ -1152,8 +1188,10 @@ def create_concurrent_cursor_from_datetime_based_cursor(
             connector_state_converter=connector_state_converter,
             cursor_field=cursor_field,
             slice_boundary_fields=slice_boundary_fields,
-            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
-            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+            start=start_date,
+            # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+            end_provider=end_date_provider,
+            # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
             lookback_window=lookback_window,
             slice_range=step_length,
             cursor_granularity=cursor_granularity,
@@ -1911,6 +1949,12 @@ def create_http_requester(
             )
         )
 
+        api_budget = (
+            self._create_component_from_model(model=model.api_budget, config=config)
+            if model.api_budget
+            else None
+        )
+
         request_options_provider = InterpolatedRequestOptionsProvider(
             request_body_data=model.request_body_data,
             request_body_json=model.request_body_json,
@@ -1931,6 +1975,7 @@ def create_http_requester(
             path=model.path,
             authenticator=authenticator,
             error_handler=error_handler,
+            api_budget=api_budget,
             http_method=HttpMethod[model.http_method.value],
             request_options_provider=request_options_provider,
             config=config,
@@ -2919,3 +2964,76 @@ def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
             return isinstance(parser.inner_parser, JsonParser)
         else:
             return False
+
+    def create_api_budget(
+        self, model: APIBudgetModel, config: Config, **kwargs: Any
+    ) -> HttpAPIBudget:
+        policies = [
+            self._create_component_from_model(model=policy, config=config)
+            for policy in model.policies
+        ]
+
+        return HttpAPIBudget(
+            policies=policies,
+            ratelimit_reset_header=model.ratelimit_reset_header,
+            ratelimit_remaining_header=model.ratelimit_remaining_header,
+            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit,
+            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire,
+        )
+
+    def create_fixed_window_call_rate_policy(
+        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
+    ) -> FixedWindowCallRatePolicy:
+        matchers = [
+            self._create_component_from_model(model=matcher, config=config)
+            for matcher in model.matchers
+        ]
+        return FixedWindowCallRatePolicy(
+            next_reset_ts=model.next_reset_ts,
+            period=parse_duration(model.period),
+            call_limit=model.call_limit,
+            matchers=matchers,
+        )
+
+    def create_moving_window_call_rate_policy(
+        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
+    ) -> MovingWindowCallRatePolicy:
+        rates = [
+            self._create_component_from_model(model=rate, config=config) for rate in model.rates
+        ]
+        matchers = [
+            self._create_component_from_model(model=matcher, config=config)
+            for matcher in model.matchers
+        ]
+        return MovingWindowCallRatePolicy(
+            rates=rates,
+            matchers=matchers,
+        )
+
+    def create_unlimited_call_rate_policy(
+        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
+    ) -> UnlimitedCallRatePolicy:
+        matchers = [
+            self._create_component_from_model(model=matcher, config=config)
+            for matcher in model.matchers
+        ]
+
+        return UnlimitedCallRatePolicy(
+            matchers=matchers,
+        )
+
+    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
+        return Rate(
+            limit=model.limit,
+            interval=model.interval,
+        )
+
+    def create_http_request_matcher(
+        self, model: HttpRequestMatcherModel, config: Config, **kwargs: Any
+    ) -> HttpRequestMatcher:
+        return HttpRequestMatcher(
+            method=model.method,
+            url=model.url,
+            params=model.params,
+            headers=model.headers,
+        )
diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py
index 35d4b0f11..96b6a4365 100644
--- a/airbyte_cdk/sources/declarative/requesters/http_requester.py
+++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py
@@ -22,6 +22,7 @@
 )
 from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
 from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
+from airbyte_cdk.sources.streams.call_rate import APIBudget
 from airbyte_cdk.sources.streams.http import HttpClient
 from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
 from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -55,6 +56,7 @@ class HttpRequester(Requester):
     http_method: Union[str, HttpMethod] = HttpMethod.GET
     request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
     error_handler: Optional[ErrorHandler] = None
+    api_budget: Optional[APIBudget] = None
     disable_retries: bool = False
     message_repository: MessageRepository = NoopMessageRepository()
     use_cache: bool = False
@@ -91,6 +93,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
             name=self.name,
             logger=self.logger,
             error_handler=self.error_handler,
+            api_budget=self.api_budget,
             authenticator=self._authenticator,
             use_cache=self.use_cache,
             backoff_strategy=backoff_strategies,

From b6bcdd7aa93e04fb3a81824c99d7b5821dbeffc7 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Thu, 6 Feb 2025 20:40:54 +0200
Subject: [PATCH 02/26] Refactor to move api_budget to root level

---
 .../declarative_component_schema.yaml         |  67 ++++++--
 .../manifest_declarative_source.py            |   4 +
 .../models/declarative_component_schema.py    |  69 ++++++--
 .../parsers/model_to_component_factory.py     |  81 ++++++---
 airbyte_cdk/sources/streams/call_rate.py      | 155 ++++++++++--------
 5 files changed, 251 insertions(+), 125 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index ea044f816..aa4e2b4df 100644
--- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -40,6 +40,12 @@ properties:
     "$ref": "#/definitions/Spec"
   concurrency_level:
     "$ref": "#/definitions/ConcurrencyLevel"
+  api_budget:
+    title: API Budget
+    description: Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.
+    anyOf:
+      - "$ref": "#/definitions/APIBudget"
+      - "$ref": "#/definitions/HTTPAPIBudget"
   metadata:
     type: object
     description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -794,7 +800,7 @@ definitions:
         description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
         type: object
         required:
-        - target
+          - target
         properties:
           target:
             title: Target
@@ -1367,17 +1373,49 @@ definitions:
         additional_properties: true
   APIBudget:
     title: API Budget
-    description: Component that defines how many requests can be made to the API in a given time frame.
+    description: >
+      A generic API budget configuration that defines the policies (rate limiting rules)
+      and the maximum number of attempts to acquire a call credit. This budget does not automatically
+      update itself based on HTTP response headers.
     type: object
     required:
       - type
+      - policies
     properties:
       type:
         type: string
         enum: [APIBudget]
       policies:
         title: Policies
-        description: List of policies that define the rate limits for different types of requests.
+        description: List of call rate policies that define how many calls are allowed.
+        type: array
+        items:
+          anyOf:
+            - "$ref": "#/definitions/FixedWindowCallRatePolicy"
+            - "$ref": "#/definitions/MovingWindowCallRatePolicy"
+            - "$ref": "#/definitions/UnlimitedCallRatePolicy"
+      maximum_attempts_to_acquire:
+        title: Maximum Attempts to Acquire
+        description: The maximum number of attempts to acquire a call before giving up.
+        type: integer
+        default: 100000
+    additionalProperties: true
+  HTTPAPIBudget:
+    title: HTTP API Budget
+    description: >
+      An HTTP-specific API budget that extends APIBudget by updating rate limiting information based
+      on HTTP response headers. It extracts available calls and the next reset timestamp from the HTTP responses.
+    type: object
+    required:
+      - type
+      - policies
+    properties:
+      type:
+        type: string
+        enum: [HTTPAPIBudget]
+      policies:
+        title: Policies
+        description: List of call rate policies that define how many calls are allowed.
         type: array
         items:
           anyOf:
@@ -1386,12 +1424,12 @@ definitions:
             - "$ref": "#/definitions/UnlimitedCallRatePolicy"
       ratelimit_reset_header:
         title: Rate Limit Reset Header
-        description: The name of the header that contains the timestamp for when the rate limit will reset.
+        description: The HTTP response header name that indicates when the rate limit resets.
         type: string
         default: "ratelimit-reset"
       ratelimit_remaining_header:
         title: Rate Limit Remaining Header
-        description: The name of the header that contains the number of remaining requests.
+        description: The HTTP response header name that indicates the number of remaining allowed calls.
         type: string
         default: "ratelimit-remaining"
       status_codes_for_ratelimit_hit:
@@ -1505,16 +1543,23 @@ definitions:
     additionalProperties: true
   HttpRequestMatcher:
     title: HTTP Request Matcher
-    description: Matches HTTP requests based on method, URL, parameters, and headers.
+    description: >
+      Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
+      Use `url_base` to specify the scheme and host (without trailing slash) and
+      `url_path_pattern` to apply a regex to the request path.
     type: object
     properties:
       method:
         title: Method
         description: The HTTP method to match (e.g., GET, POST).
         type: string
-      url:
-        title: URL
-        description: The URL to match.
+      url_base:
+        title: URL Base
+        description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
+        type: string
+      url_path_pattern:
+        title: URL Path Pattern
+        description: A regular expression pattern to match the URL path.
         type: string
       params:
         title: Parameters
@@ -1799,10 +1844,6 @@ definitions:
           - "$ref": "#/definitions/DefaultErrorHandler"
           - "$ref": "#/definitions/CustomErrorHandler"
           - "$ref": "#/definitions/CompositeErrorHandler"
-      api_budget:
-        title: API Budget
-        description: Component that defines how many requests can be made to the API in a given time frame.
-        "$ref": "#/definitions/APIBudget"
       http_method:
         title: HTTP Method
         description: The HTTP method used to fetch data from the source (can be GET or POST).
diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py
index efc779464..d3afb1396 100644
--- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py
+++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py
@@ -137,6 +137,10 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
             self._source_config, config
         )
 
+        api_budget_model = self._source_config.get("api_budget")
+        if api_budget_model:
+            self._constructor.set_api_budget(api_budget_model, config)
+
         source_streams = [
             self._constructor.create_component(
                 DeclarativeStreamModel,
diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
index bd5a69f6c..c00e46831 100644
--- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
+++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -664,7 +664,16 @@ class Config:
     method: Optional[str] = Field(
         None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
     )
-    url: Optional[str] = Field(None, description="The URL to match.", title="URL")
+    url_base: Optional[str] = Field(
+        None,
+        description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
+        title="URL Base",
+    )
+    url_path_pattern: Optional[str] = Field(
+        None,
+        description="A regular expression pattern to match the URL path.",
+        title="URL Path Pattern",
+    )
     params: Optional[Dict[str, Any]] = Field(
         None, description="The query parameters to match.", title="Parameters"
     )
@@ -1799,27 +1808,48 @@ class Config:
         extra = Extra.allow
 
     type: Literal["APIBudget"]
-    policies: Optional[
-        List[
-            Union[
-                FixedWindowCallRatePolicy,
-                MovingWindowCallRatePolicy,
-                UnlimitedCallRatePolicy,
-            ]
+    policies: List[
+        Union[
+            FixedWindowCallRatePolicy,
+            MovingWindowCallRatePolicy,
+            UnlimitedCallRatePolicy,
         ]
     ] = Field(
-        None,
-        description="List of policies that define the rate limits for different types of requests.",
+        ...,
+        description="List of call rate policies that define how many calls are allowed.",
+        title="Policies",
+    )
+    maximum_attempts_to_acquire: Optional[int] = Field(
+        100000,
+        description="The maximum number of attempts to acquire a call before giving up.",
+        title="Maximum Attempts to Acquire",
+    )
+
+
+class HTTPAPIBudget(BaseModel):
+    class Config:
+        extra = Extra.allow
+
+    type: Literal["HTTPAPIBudget"]
+    policies: List[
+        Union[
+            FixedWindowCallRatePolicy,
+            MovingWindowCallRatePolicy,
+            UnlimitedCallRatePolicy,
+        ]
+    ] = Field(
+        ...,
+        description="List of call rate policies that define how many calls are allowed.",
         title="Policies",
     )
     ratelimit_reset_header: Optional[str] = Field(
         "ratelimit-reset",
-        description="The name of the header that contains the timestamp for when the rate limit will reset.",
+        description="The HTTP response header name that indicates when the rate limit resets.",
         title="Rate Limit Reset Header",
     )
     ratelimit_remaining_header: Optional[str] = Field(
         "ratelimit-remaining",
-        description="The name of the header that contains the number of remaining requests.",
+        description="The HTTP response header name that indicates the number of remaining allowed calls.",
         title="Rate Limit Remaining Header",
     )
     status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
@@ -1867,6 +1897,11 @@ class Config:
     definitions: Optional[Dict[str, Any]] = None
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
+    api_budget: Optional[Union[APIBudget, HTTPAPIBudget]] = Field(
+        None,
+        description="Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.",
+        title="API Budget",
+    )
     metadata: Optional[Dict[str, Any]] = Field(
         None,
         description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1893,6 +1928,11 @@ class Config:
     definitions: Optional[Dict[str, Any]] = None
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
+    api_budget: Optional[Union[APIBudget, HTTPAPIBudget]] = Field(
+        None,
+        description="Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.",
+        title="API Budget",
+    )
     metadata: Optional[Dict[str, Any]] = Field(
         None,
         description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -2104,11 +2144,6 @@ class HttpRequester(BaseModel):
         description="Error handler component that defines how to handle errors.",
         title="Error Handler",
     )
-    api_budget: Optional[APIBudget] = Field(
-        None,
-        description="Component that defines how many requests can be made to the API in a given time frame.",
-        title="API Budget",
-    )
     http_method: Optional[HttpMethod] = Field(
         HttpMethod.GET,
         description="The HTTP method used to fetch data from the source (can be GET or POST).",
diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index cec9aff25..87048a005 100644
--- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -241,6 +241,9 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     GzipParser as GzipParserModel,
 )
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+    HTTPAPIBudget as HTTPAPIBudgetModel,
+)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpComponentsResolver as HttpComponentsResolverModel,
 )
@@ -488,6 +491,7 @@
     NoopMessageRepository,
 )
 from airbyte_cdk.sources.streams.call_rate import (
+    APIBudget,
     FixedWindowCallRatePolicy,
     HttpAPIBudget,
     HttpRequestMatcher,
@@ -546,6 +550,7 @@ def __init__(
             self._evaluate_log_level(emit_connector_builder_messages)
         )
         self._connector_state_manager = connector_state_manager or ConnectorStateManager()
+        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
 
     def _init_mappings(self) -> None:
         self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -634,6 +639,7 @@ def _init_mappings(self) -> None:
             ComponentMappingDefinitionModel: self.create_components_mapping_definition,
             ZipfileDecoderModel: self.create_zipfile_decoder,
             APIBudgetModel: self.create_api_budget,
+            HTTPAPIBudgetModel: self.create_http_api_budget,
             FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
             MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
             UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
@@ -845,8 +851,7 @@ def create_legacy_to_per_partition_state_migration(
 
         return LegacyToPerPartitionStateMigration(
             partition_router,  # type: ignore # was already checked above
-            declarative_stream.incremental_sync,
-            # type: ignore # was already checked. Migration can be applied only to incremental streams.
+            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
             config,
             declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
         )
@@ -1144,8 +1149,7 @@ def create_concurrent_cursor_from_datetime_based_cursor(
                     clamping_strategy = DayClampingStrategy()
                     end_date_provider = ClampingEndProvider(
                         DayClampingStrategy(is_ceiling=False),
-                        end_date_provider,
-                        # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
                         granularity=cursor_granularity or datetime.timedelta(seconds=1),
                     )
                 case "WEEK":
@@ -1162,16 +1166,14 @@ def create_concurrent_cursor_from_datetime_based_cursor(
                     clamping_strategy = WeekClampingStrategy(weekday)
                     end_date_provider = ClampingEndProvider(
                         WeekClampingStrategy(weekday, is_ceiling=False),
-                        end_date_provider,
-                        # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
                         granularity=cursor_granularity or datetime.timedelta(days=1),
                     )
                 case "MONTH":
                     clamping_strategy = MonthClampingStrategy()
                     end_date_provider = ClampingEndProvider(
                         MonthClampingStrategy(is_ceiling=False),
-                        end_date_provider,
-                        # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
                         granularity=cursor_granularity or datetime.timedelta(days=1),
                     )
                 case _:
@@ -1188,10 +1190,8 @@ def create_concurrent_cursor_from_datetime_based_cursor(
             connector_state_converter=connector_state_converter,
             cursor_field=cursor_field,
             slice_boundary_fields=slice_boundary_fields,
-            start=start_date,
-            # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
-            end_provider=end_date_provider,
-            # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
+            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
             lookback_window=lookback_window,
             slice_range=step_length,
             cursor_granularity=cursor_granularity,
@@ -1949,11 +1949,7 @@ def create_http_requester(
             )
         )
 
-        api_budget = (
-            self._create_component_from_model(model=model.api_budget, config=config)
-            if model.api_budget
-            else None
-        )
+        api_budget = self._api_budget
 
         request_options_provider = InterpolatedRequestOptionsProvider(
             request_body_data=model.request_body_data,
@@ -2965,8 +2961,21 @@ def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
         else:
             return False
 
-    def create_api_budget(
-        self, model: APIBudgetModel, config: Config, **kwargs: Any
+    def create_api_budget(self, model: APIBudgetModel, config: Config, **kwargs: Any) -> APIBudget:
+        policies = [
+            self._create_component_from_model(model=policy, config=config)
+            for policy in model.policies
+        ]
+
+        return APIBudget(
+            policies=policies,
+            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire
+            if model.maximum_attempts_to_acquire
+            else 100000,
+        )
+
+    def create_http_api_budget(
+        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
     ) -> HttpAPIBudget:
         policies = [
             self._create_component_from_model(model=policy, config=config)
@@ -2975,10 +2984,18 @@ def create_api_budget(
 
         return HttpAPIBudget(
             policies=policies,
-            ratelimit_reset_header=model.ratelimit_reset_header,
-            ratelimit_remaining_header=model.ratelimit_remaining_header,
-            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit,
-            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire,
+            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire
+            if model.maximum_attempts_to_acquire
+            else 100000,
+            ratelimit_reset_header=model.ratelimit_reset_header
+            if model.ratelimit_reset_header
+            else "ratelimit-reset",
+            ratelimit_remaining_header=model.ratelimit_remaining_header
+            if model.ratelimit_remaining_header
+            else "ratelimit-remaining",
+            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit
+            if model.status_codes_for_ratelimit_hit
+            else (429,),
         )
 
     def create_fixed_window_call_rate_policy(
@@ -3033,7 +3050,23 @@ def create_http_request_matcher(
     ) -> HttpRequestMatcher:
         return HttpRequestMatcher(
             method=model.method,
-            url=model.url,
+            url_base=model.url_base,
+            url_path_pattern=model.url_path_pattern,
             params=model.params,
             headers=model.headers,
         )
+
+    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
+        model_str = component_definition.get("type")
+        if model_str == "APIBudget":
+            # Annotate model_type as a type that is a subclass of BaseModel
+            model_type: Union[Type[APIBudgetModel], Type[HTTPAPIBudgetModel]] = APIBudgetModel
+        elif model_str == "HTTPAPIBudget":
+            model_type = HTTPAPIBudgetModel
+        else:
+            raise ValueError(f"Unknown API Budget type: {model_str}")
+
+        # create_component expects a type[BaseModel] and returns an instance of that model.
+        self._api_budget = self.create_component(
+            model_type=model_type, component_definition=component_definition, config=config
+        )
diff --git a/airbyte_cdk/sources/streams/call_rate.py b/airbyte_cdk/sources/streams/call_rate.py
index 81ebac78e..d25fb9c2b 100644
--- a/airbyte_cdk/sources/streams/call_rate.py
+++ b/airbyte_cdk/sources/streams/call_rate.py
@@ -6,10 +6,12 @@
 import dataclasses
 import datetime
 import logging
+import re
 import time
+from dataclasses import InitVar, dataclass, field
 from datetime import timedelta
 from threading import RLock
-from typing import TYPE_CHECKING, Any, Mapping, Optional
+from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
 from urllib import parse
 
 import requests
@@ -98,43 +100,55 @@ def __call__(self, request: Any) -> bool:
 
 
 class HttpRequestMatcher(RequestMatcher):
-    """Simple implementation of RequestMatcher for http requests case"""
+    """
+    Extended RequestMatcher for HTTP requests that supports matching on:
+      - HTTP method (case-insensitive)
+      - URL base (scheme + netloc) optionally
+      - URL path pattern (a regex applied to the path portion of the URL)
+      - Query parameters (must be present)
+      - Headers (header names compared case-insensitively)
+    """
 
     def __init__(
         self,
         method: Optional[str] = None,
-        url: Optional[str] = None,
+        url_base: Optional[str] = None,
+        url_path_pattern: Optional[str] = None,
         params: Optional[Mapping[str, Any]] = None,
         headers: Optional[Mapping[str, Any]] = None,
     ):
-        """Constructor
-
-        :param method:
-        :param url:
-        :param params:
-        :param headers:
         """
-        self._method = method
-        self._url = url
+        :param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively.
+        :param url_base: Base URL (scheme://host) that must match.
+        :param url_path_pattern: A regex pattern that will be applied to the path portion of the URL.
+        :param params: Dictionary of query parameters that must be present in the request.
+        :param headers: Dictionary of headers that must be present (header keys are compared case-insensitively).
+        """
+        self._method = method.upper() if method else None
+
+        # Normalize the url_base if provided: remove trailing slash.
+        self._url_base = url_base.rstrip("/") if url_base else None
+
+        # Compile the URL path pattern if provided.
+        self._url_path_pattern = re.compile(url_path_pattern) if url_path_pattern else None
+
+        # Normalize query parameters to strings.
         self._params = {str(k): str(v) for k, v in (params or {}).items()}
-        self._headers = {str(k): str(v) for k, v in (headers or {}).items()}
+
+        # Normalize header keys to lowercase.
+        self._headers = {str(k).lower(): str(v) for k, v in (headers or {}).items()}
 
     @staticmethod
     def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
-        """Check that all elements from pattern dict present and have the same values in obj dict
-
-        :param obj:
-        :param pattern:
-        :return:
-        """
+        """Check that every key/value in the pattern exists in the object."""
         return pattern.items() <= obj.items()
 
     def __call__(self, request: Any) -> bool:
         """
-
-        :param request:
-        :return: True if matches the provided request object, False - otherwise
+        :param request: A requests.Request or requests.PreparedRequest instance.
+        :return: True if the request matches all provided criteria; False otherwise.
         """
+        # Prepare the request (if needed) and extract the URL details.
         if isinstance(request, requests.Request):
             prepared_request = request.prepare()
         elif isinstance(request, requests.PreparedRequest):
@@ -142,21 +156,40 @@ def __call__(self, request: Any) -> bool:
         else:
             return False
 
-        if self._method is not None:
-            if prepared_request.method != self._method:
+        # Check HTTP method.
+        if self._method is not None and prepared_request.method is not None:
+            if prepared_request.method.upper() != self._method:
                 return False
-        if self._url is not None and prepared_request.url is not None:
-            url_without_params = prepared_request.url.split("?")[0]
-            if url_without_params != self._url:
+
+        # Parse the URL.
+        parsed_url = parse.urlsplit(prepared_request.url)
+        # Reconstruct the base: scheme://netloc
+        request_url_base = f"{str(parsed_url.scheme)}://{str(parsed_url.netloc)}"
+        # The path (without query parameters)
+        request_path = str(parsed_url.path).rstrip("/")
+
+        # If a base URL is provided, check that it matches.
+        if self._url_base is not None:
+            if request_url_base != self._url_base:
                 return False
-        if self._params is not None:
-            parsed_url = parse.urlsplit(prepared_request.url)
-            params = dict(parse.parse_qsl(str(parsed_url.query)))
-            if not self._match_dict(params, self._params):
+
+        # If a URL path pattern is provided, ensure the path matches the regex.
+        if self._url_path_pattern is not None:
+            if not self._url_path_pattern.search(request_path):
                 return False
-        if self._headers is not None:
-            if not self._match_dict(prepared_request.headers, self._headers):
+
+        # Check query parameters.
+        if self._params:
+            query_params = dict(parse.parse_qsl(str(parsed_url.query)))
+            if not self._match_dict(query_params, self._params):
                 return False
+
+        # Check headers (normalize keys to lower-case).
+        if self._headers:
+            req_headers = {k.lower(): v for k, v in prepared_request.headers.items()}
+            if not self._match_dict(req_headers, self._headers):
+                return False
+
         return True
 
 
@@ -399,24 +432,17 @@ def update_from_response(self, request: Any, response: Any) -> None:
         """
 
 
+@dataclass
 class APIBudget(AbstractAPIBudget):
-    """Default APIBudget implementation"""
-
-    def __init__(
-        self, policies: list[AbstractCallRatePolicy], maximum_attempts_to_acquire: int = 100000
-    ) -> None:
-        """Constructor
-
-        :param policies: list of policies in this budget
-        :param maximum_attempts_to_acquire: number of attempts before throwing hit ratelimit exception, we put some big number here
-         to avoid situations when many threads compete with each other for a few lots over a significant amount of time
-        """
+    """
+    Default APIBudget implementation.
+    """
 
-        self._policies = policies
-        self._maximum_attempts_to_acquire = maximum_attempts_to_acquire
+    policies: list[AbstractCallRatePolicy]
+    maximum_attempts_to_acquire: int = 100000
 
     def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
-        for policy in self._policies:
+        for policy in self.policies:
             if policy.matches(request):
                 return policy
         return None
@@ -437,7 +463,7 @@ def acquire_call(
         policy = self.get_matching_policy(request)
         if policy:
             self._do_acquire(request=request, policy=policy, block=block, timeout=timeout)
-        elif self._policies:
+        elif self.policies:
             logger.info("no policies matched with requests, allow call by default")
 
     def update_from_response(self, request: Any, response: Any) -> None:
@@ -460,7 +486,7 @@ def _do_acquire(
         """
         last_exception = None
         # sometimes we spend all budget before a second attempt, so we have few more here
-        for attempt in range(1, self._maximum_attempts_to_acquire):
+        for attempt in range(1, self.maximum_attempts_to_acquire):
             try:
                 policy.try_acquire(request, weight=1)
                 return
@@ -484,31 +510,18 @@ def _do_acquire(
 
         if last_exception:
             logger.info(
-                "we used all %s attempts to acquire and failed", self._maximum_attempts_to_acquire
+                "we used all %s attempts to acquire and failed", self.maximum_attempts_to_acquire
             )
             raise last_exception
 
 
+@dataclass
 class HttpAPIBudget(APIBudget):
     """Implementation of AbstractAPIBudget for HTTP"""
 
-    def __init__(
-        self,
-        ratelimit_reset_header: str = "ratelimit-reset",
-        ratelimit_remaining_header: str = "ratelimit-remaining",
-        status_codes_for_ratelimit_hit: tuple[int] = (429,),
-        **kwargs: Any,
-    ):
-        """Constructor
-
-        :param ratelimit_reset_header: name of the header that has a timestamp of the next reset of call budget
-        :param ratelimit_remaining_header: name of the header that has the number of calls left
-        :param status_codes_for_ratelimit_hit: list of HTTP status codes that signal about rate limit being hit
-        """
-        self._ratelimit_reset_header = ratelimit_reset_header
-        self._ratelimit_remaining_header = ratelimit_remaining_header
-        self._status_codes_for_ratelimit_hit = status_codes_for_ratelimit_hit
-        super().__init__(**kwargs)
+    ratelimit_reset_header: str = "ratelimit-reset"
+    ratelimit_remaining_header: str = "ratelimit-remaining"
+    status_codes_for_ratelimit_hit: Union[tuple[int], list[int]] = (429,)
 
     def update_from_response(self, request: Any, response: Any) -> None:
         policy = self.get_matching_policy(request)
@@ -523,17 +536,17 @@ def update_from_response(self, request: Any, response: Any) -> None:
     def get_reset_ts_from_response(
         self, response: requests.Response
     ) -> Optional[datetime.datetime]:
-        if response.headers.get(self._ratelimit_reset_header):
+        if response.headers.get(self.ratelimit_reset_header):
             return datetime.datetime.fromtimestamp(
-                int(response.headers[self._ratelimit_reset_header])
+                int(response.headers[self.ratelimit_reset_header])
             )
         return None
 
     def get_calls_left_from_response(self, response: requests.Response) -> Optional[int]:
-        if response.headers.get(self._ratelimit_remaining_header):
-            return int(response.headers[self._ratelimit_remaining_header])
+        if response.headers.get(self.ratelimit_remaining_header):
+            return int(response.headers[self.ratelimit_remaining_header])
 
-        if response.status_code in self._status_codes_for_ratelimit_hit:
+        if response.status_code in self.status_codes_for_ratelimit_hit:
             return 0
 
         return None

From 040ff9e5ec97af3fd7e56bf18fb46a5e70273153 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Thu, 6 Feb 2025 20:46:27 +0200
Subject: [PATCH 03/26] Format

---
 .../parsers/model_to_component_factory.py     | 20 +++++--------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 87048a005..0ae7e9572 100644
--- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -2969,9 +2969,7 @@ def create_api_budget(self, model: APIBudgetModel, config: Config, **kwargs: Any
 
         return APIBudget(
             policies=policies,
-            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire
-            if model.maximum_attempts_to_acquire
-            else 100000,
+            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire or 100000,
         )
 
     def create_http_api_budget(
@@ -2984,18 +2982,10 @@ def create_http_api_budget(
 
         return HttpAPIBudget(
             policies=policies,
-            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire
-            if model.maximum_attempts_to_acquire
-            else 100000,
-            ratelimit_reset_header=model.ratelimit_reset_header
-            if model.ratelimit_reset_header
-            else "ratelimit-reset",
-            ratelimit_remaining_header=model.ratelimit_remaining_header
-            if model.ratelimit_remaining_header
-            else "ratelimit-remaining",
-            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit
-            if model.status_codes_for_ratelimit_hit
-            else (429,),
+            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire or 100000,
+            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
+            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
+            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or (429,),
         )
 
     def create_fixed_window_call_rate_policy(

From 15f830ca5be3ad69cc8065a5de43098d0a1ab110 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Fri, 7 Feb 2025 17:43:53 +0200
Subject: [PATCH 04/26] Update for backward compatibility

---
 .../declarative_component_schema.yaml         |  8 +-
 .../models/declarative_component_schema.py    |  8 +-
 .../parsers/model_to_component_factory.py     | 12 +--
 airbyte_cdk/sources/streams/call_rate.py      | 63 +++++++++++++
 unit_tests/sources/streams/test_call_rate.py  | 88 +++++++++++++++++++
 5 files changed, 165 insertions(+), 14 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index abcddf514..25c9492fb 100644
--- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -1478,7 +1478,7 @@ definitions:
         description: List of matchers that define which requests this policy applies to.
         type: array
         items:
-          "$ref": "#/definitions/HttpRequestMatcher"
+          "$ref": "#/definitions/HttpRequestRegexMatcher"
     additionalProperties: true
   MovingWindowCallRatePolicy:
     title: Moving Window Call Rate Policy
@@ -1503,7 +1503,7 @@ definitions:
         description: List of matchers that define which requests this policy applies to.
         type: array
         items:
-          "$ref": "#/definitions/HttpRequestMatcher"
+          "$ref": "#/definitions/HttpRequestRegexMatcher"
     additionalProperties: true
   UnlimitedCallRatePolicy:
     title: Unlimited Call Rate Policy
@@ -1521,7 +1521,7 @@ definitions:
         description: List of matchers that define which requests this policy applies to.
         type: array
         items:
-          "$ref": "#/definitions/HttpRequestMatcher"
+          "$ref": "#/definitions/HttpRequestRegexMatcher"
     additionalProperties: true
   Rate:
     title: Rate
@@ -1541,7 +1541,7 @@ definitions:
         type: string
         format: duration
     additionalProperties: true
-  HttpRequestMatcher:
+  HttpRequestRegexMatcher:
     title: HTTP Request Matcher
     description: >
       Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
index 5bd0aa80d..aaff67548 100644
--- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
+++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -657,7 +657,7 @@ class Config:
     )
 
 
-class HttpRequestMatcher(BaseModel):
+class HttpRequestRegexMatcher(BaseModel):
     class Config:
         extra = Extra.allow
 
@@ -1642,7 +1642,7 @@ class Config:
         description="The maximum number of calls allowed within the period.",
         title="Call Limit",
     )
-    matchers: List[HttpRequestMatcher] = Field(
+    matchers: List[HttpRequestRegexMatcher] = Field(
         ...,
         description="List of matchers that define which requests this policy applies to.",
         title="Matchers",
@@ -1659,7 +1659,7 @@ class Config:
         description="List of rates that define the call limits for different time intervals.",
         title="Rates",
     )
-    matchers: List[HttpRequestMatcher] = Field(
+    matchers: List[HttpRequestRegexMatcher] = Field(
         ...,
         description="List of matchers that define which requests this policy applies to.",
         title="Matchers",
@@ -1671,7 +1671,7 @@ class Config:
         extra = Extra.allow
 
     type: Literal["UnlimitedCallRatePolicy"]
-    matchers: List[HttpRequestMatcher] = Field(
+    matchers: List[HttpRequestRegexMatcher] = Field(
         ...,
         description="List of matchers that define which requests this policy applies to.",
         title="Matchers",
diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 6f3f39604..9bd775a4a 100644
--- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -251,7 +251,7 @@
     HttpRequester as HttpRequesterModel,
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    HttpRequestMatcher as HttpRequestMatcherModel,
+    HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpResponseFilter as HttpResponseFilterModel,
@@ -494,7 +494,7 @@
     APIBudget,
     FixedWindowCallRatePolicy,
     HttpAPIBudget,
-    HttpRequestMatcher,
+    HttpRequestRegexMatcher,
     MovingWindowCallRatePolicy,
     Rate,
     UnlimitedCallRatePolicy,
@@ -644,7 +644,7 @@ def _init_mappings(self) -> None:
             MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
             UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
             RateModel: self.create_rate,
-            HttpRequestMatcherModel: self.create_http_request_matcher,
+            HttpRequestRegexMatcherModel: self.create_http_request_matcher,
         }
 
         # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -3040,9 +3040,9 @@ def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
         )
 
     def create_http_request_matcher(
-        self, model: HttpRequestMatcherModel, config: Config, **kwargs: Any
-    ) -> HttpRequestMatcher:
-        return HttpRequestMatcher(
+        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
+    ) -> HttpRequestRegexMatcher:
+        return HttpRequestRegexMatcher(
             method=model.method,
             url_base=model.url_base,
             url_path_pattern=model.url_path_pattern,
diff --git a/airbyte_cdk/sources/streams/call_rate.py b/airbyte_cdk/sources/streams/call_rate.py
index d25fb9c2b..21fec881f 100644
--- a/airbyte_cdk/sources/streams/call_rate.py
+++ b/airbyte_cdk/sources/streams/call_rate.py
@@ -100,6 +100,69 @@ def __call__(self, request: Any) -> bool:
 
 
 class HttpRequestMatcher(RequestMatcher):
+    """Simple implementation of RequestMatcher for http requests case"""
+
+    def __init__(
+        self,
+        method: Optional[str] = None,
+        url: Optional[str] = None,
+        params: Optional[Mapping[str, Any]] = None,
+        headers: Optional[Mapping[str, Any]] = None,
+    ):
+        """Constructor
+
+        :param method:
+        :param url:
+        :param params:
+        :param headers:
+        """
+        self._method = method
+        self._url = url
+        self._params = {str(k): str(v) for k, v in (params or {}).items()}
+        self._headers = {str(k): str(v) for k, v in (headers or {}).items()}
+
+    @staticmethod
+    def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
+        """Check that all elements from pattern dict present and have the same values in obj dict
+
+        :param obj:
+        :param pattern:
+        :return:
+        """
+        return pattern.items() <= obj.items()
+
+    def __call__(self, request: Any) -> bool:
+        """
+
+        :param request:
+        :return: True if matches the provided request object, False - otherwise
+        """
+        if isinstance(request, requests.Request):
+            prepared_request = request.prepare()
+        elif isinstance(request, requests.PreparedRequest):
+            prepared_request = request
+        else:
+            return False
+
+        if self._method is not None:
+            if prepared_request.method != self._method:
+                return False
+        if self._url is not None and prepared_request.url is not None:
+            url_without_params = prepared_request.url.split("?")[0]
+            if url_without_params != self._url:
+                return False
+        if self._params is not None:
+            parsed_url = parse.urlsplit(prepared_request.url)
+            params = dict(parse.parse_qsl(str(parsed_url.query)))
+            if not self._match_dict(params, self._params):
+                return False
+        if self._headers is not None:
+            if not self._match_dict(prepared_request.headers, self._headers):
+                return False
+        return True
+
+
+class HttpRequestRegexMatcher(RequestMatcher):
     """
     Extended RequestMatcher for HTTP requests that supports matching on:
       - HTTP method (case-insensitive)
diff --git a/unit_tests/sources/streams/test_call_rate.py b/unit_tests/sources/streams/test_call_rate.py
index 16bce68e3..853e2997e 100644
--- a/unit_tests/sources/streams/test_call_rate.py
+++ b/unit_tests/sources/streams/test_call_rate.py
@@ -17,6 +17,7 @@
     CallRateLimitHit,
     FixedWindowCallRatePolicy,
     HttpRequestMatcher,
+    HttpRequestRegexMatcher,
     MovingWindowCallRatePolicy,
     Rate,
     UnlimitedCallRatePolicy,
@@ -357,3 +358,90 @@ def test_with_cache(self, mocker, requests_mock):
             assert next(records) == {"data": "some_data"}
 
         assert MovingWindowCallRatePolicy.try_acquire.call_count == 1
+
+
+class TestHttpRequestRegexMatcher:
+    """
+    Tests for the new regex-based logic:
+      - Case-insensitive HTTP method matching
+      - Optional url_base (scheme://netloc)
+      - Regex-based path matching
+      - Query params (must be present)
+      - Headers (case-insensitive keys)
+    """
+
+    def test_case_insensitive_method(self):
+        matcher = HttpRequestRegexMatcher(method="GET")
+
+        req_ok = Request("get", "https://example.com/test/path")
+        req_wrong = Request("POST", "https://example.com/test/path")
+
+        assert matcher(req_ok)
+        assert not matcher(req_wrong)
+
+    def test_url_base(self):
+        matcher = HttpRequestRegexMatcher(url_base="https://example.com")
+
+        req_ok = Request("GET", "https://example.com/test/path?foo=bar")
+        req_wrong = Request("GET", "https://another.com/test/path?foo=bar")
+
+        assert matcher(req_ok)
+        assert not matcher(req_wrong)
+
+    def test_url_path_pattern(self):
+        matcher = HttpRequestRegexMatcher(url_path_pattern=r"/test/")
+
+        req_ok = Request("GET", "https://example.com/test/something")
+        req_wrong = Request("GET", "https://example.com/other/something")
+
+        assert matcher(req_ok)
+        assert not matcher(req_wrong)
+
+    def test_query_params(self):
+        matcher = HttpRequestRegexMatcher(params={"foo": "bar"})
+
+        req_ok = Request("GET", "https://example.com/api?foo=bar&extra=123")
+        req_missing = Request("GET", "https://example.com/api?not_foo=bar")
+
+        assert matcher(req_ok)
+        assert not matcher(req_missing)
+
+    def test_headers_case_insensitive(self):
+        matcher = HttpRequestRegexMatcher(headers={"X-Custom-Header": "abc"})
+
+        req_ok = Request(
+            "GET",
+            "https://example.com/api?foo=bar",
+            headers={"x-custom-header": "abc", "other": "123"},
+        )
+        req_wrong = Request("GET", "https://example.com/api", headers={"x-custom-header": "wrong"})
+
+        assert matcher(req_ok)
+        assert not matcher(req_wrong)
+
+    def test_combined_criteria(self):
+        matcher = HttpRequestRegexMatcher(
+            method="GET",
+            url_base="https://example.com",
+            url_path_pattern=r"/test/",
+            params={"foo": "bar"},
+            headers={"X-Test": "123"},
+        )
+
+        req_ok = Request("GET", "https://example.com/test/me?foo=bar", headers={"x-test": "123"})
+        req_bad_base = Request(
+            "GET", "https://other.com/test/me?foo=bar", headers={"x-test": "123"}
+        )
+        req_bad_path = Request("GET", "https://example.com/nope?foo=bar", headers={"x-test": "123"})
+        req_bad_param = Request(
+            "GET", "https://example.com/test/me?extra=xyz", headers={"x-test": "123"}
+        )
+        req_bad_header = Request(
+            "GET", "https://example.com/test/me?foo=bar", headers={"some-other-header": "xyz"}
+        )
+
+        assert matcher(req_ok)
+        assert not matcher(req_bad_base)
+        assert not matcher(req_bad_path)
+        assert not matcher(req_bad_param)
+        assert not matcher(req_bad_header)

From 1285668eecf394e90d373490c561d506f808d73d Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Sun, 9 Feb 2025 22:26:53 +0200
Subject: [PATCH 05/26] Add unit tests

---
 .../test_model_to_component_factory.py        | 80 +++++++++++++++++++
 .../requesters/test_http_requester.py         | 32 ++++++++
 2 files changed, 112 insertions(+)

diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
index 43564a5c8..769bc52a0 100644
--- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
+++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
@@ -142,6 +142,7 @@
 from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
 from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
 from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
+from airbyte_cdk.sources.streams.call_rate import MovingWindowCallRatePolicy
 from airbyte_cdk.sources.streams.concurrent.clamping import (
     ClampingEndProvider,
     DayClampingStrategy,
@@ -3564,3 +3565,82 @@ def test_create_async_retriever():
     assert isinstance(selector, RecordSelector)
     assert isinstance(extractor, DpathExtractor)
     assert extractor.field_path == ["data"]
+
+
+def test_api_budget():
+    manifest = {
+        "type": "DeclarativeSource",
+        "api_budget": {
+            "type": "HTTPAPIBudget",
+            "ratelimit_reset_header": "X-RateLimit-Reset",
+            "ratelimit_remaining_header": "X-RateLimit-Remaining",
+            "status_codes_for_ratelimit_hit": [429, 503],
+            "policies": [
+                {
+                    "type": "MovingWindowCallRatePolicy",
+                    "rates": [
+                        {
+                            "type": "Rate",
+                            "limit": 3,
+                            "interval": "PT0.1S",  # 0.1 seconds
+                        }
+                    ],
+                    "matchers": [
+                        {
+                            "type": "HttpRequestRegexMatcher",
+                            "method": "GET",
+                            "url_base": "https://api.sendgrid.com",
+                            "url_path_pattern": "/v3/marketing/lists",
+                        }
+                    ],
+                }
+            ],
+        },
+        "my_requester": {
+            "type": "HttpRequester",
+            "path": "/v3/marketing/lists",
+            "url_base": "https://api.sendgrid.com",
+            "http_method": "GET",
+            "authenticator": {
+                "type": "BasicHttpAuthenticator",
+                "username": "admin",
+                "password": "{{ config['password'] }}",
+            },
+        },
+    }
+
+    config = {
+        "password": "verysecrettoken",
+    }
+
+    factory = ModelToComponentFactory()
+    if "api_budget" in manifest:
+        factory.set_api_budget(manifest["api_budget"], config)
+
+    from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+        HttpRequester as HttpRequesterModel,
+    )
+
+    requester_definition = manifest["my_requester"]
+    assert requester_definition["type"] == "HttpRequester"
+
+    http_requester = factory.create_component(
+        model_type=HttpRequesterModel,
+        component_definition=requester_definition,
+        config=config,
+        name="lists_stream",
+        decoder=None,
+    )
+
+    assert http_requester.api_budget is not None
+    assert http_requester.api_budget.ratelimit_reset_header == "X-RateLimit-Reset"
+    assert http_requester.api_budget.status_codes_for_ratelimit_hit == [429, 503]
+    assert len(http_requester.api_budget.policies) == 1
+
+    # The single policy is a MovingWindowCallRatePolicy
+    policy = http_requester.api_budget.policies[0]
+    assert isinstance(policy, MovingWindowCallRatePolicy)
+    assert policy._bucket.rates[0].limit == 3
+    # The 0.1s from 'PT0.1S' is stored in ms by PyRateLimiter internally
+    # but here just check that the limit and interval exist
+    assert policy._bucket.rates[0].interval == 100  # 100 ms
diff --git a/unit_tests/sources/declarative/requesters/test_http_requester.py b/unit_tests/sources/declarative/requesters/test_http_requester.py
index f02ec206b..c5d5c218d 100644
--- a/unit_tests/sources/declarative/requesters/test_http_requester.py
+++ b/unit_tests/sources/declarative/requesters/test_http_requester.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 
+from datetime import timedelta
 from typing import Any, Mapping, Optional
 from unittest import mock
 from unittest.mock import MagicMock
@@ -9,6 +10,7 @@
 
 import pytest as pytest
 import requests
+import requests.sessions
 from requests import PreparedRequest
 
 from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
@@ -27,6 +29,12 @@
     InterpolatedRequestOptionsProvider,
 )
 from airbyte_cdk.sources.message import MessageRepository
+from airbyte_cdk.sources.streams.call_rate import (
+    AbstractAPIBudget,
+    HttpAPIBudget,
+    MovingWindowCallRatePolicy,
+    Rate,
+)
 from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
 from airbyte_cdk.sources.streams.http.exceptions import (
     RequestBodyException,
@@ -45,6 +53,7 @@ def factory(
         request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None,
         authenticator: Optional[DeclarativeAuthenticator] = None,
         error_handler: Optional[ErrorHandler] = None,
+        api_budget: Optional[HttpAPIBudget] = None,
         config: Optional[Config] = None,
         parameters: Mapping[str, Any] = None,
         disable_retries: bool = False,
@@ -61,6 +70,7 @@ def factory(
             http_method=http_method,
             request_options_provider=request_options_provider,
             error_handler=error_handler,
+            api_budget=api_budget,
             disable_retries=disable_retries,
             message_repository=message_repository or MagicMock(),
             use_cache=use_cache,
@@ -934,3 +944,25 @@ def test_backoff_strategy_from_manifest_is_respected(http_requester_factory: Any
         http_requester._http_client._request_attempt_count.get(request_mock)
         == http_requester._http_client._max_retries + 1
     )
+
+
+def test_http_requester_with_mock_apibudget(http_requester_factory, monkeypatch):
+    mock_budget = MagicMock(spec=HttpAPIBudget)
+
+    requester = http_requester_factory(
+        url_base="https://example.com",
+        path="test",
+        api_budget=mock_budget,
+    )
+
+    dummy_response = requests.Response()
+    dummy_response.status_code = 200
+    send_mock = MagicMock(return_value=dummy_response)
+    monkeypatch.setattr(requests.Session, "send", send_mock)
+
+    response = requester.send_request()
+
+    assert send_mock.call_count == 1
+    assert response.status_code == 200
+
+    assert mock_budget.acquire_call.call_count == 1

From 7be98423518c975e672629abbd4cb063048e55d2 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Sun, 9 Feb 2025 22:38:57 +0200
Subject: [PATCH 06/26] Add FixedWindowCallRatePolicy unit test

---
 .../parsers/model_to_component_factory.py     |  2 +-
 .../test_model_to_component_factory.py        | 79 +++++++++++++++++++
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 9bd775a4a..b55d40fcd 100644
--- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -3001,7 +3001,7 @@ def create_fixed_window_call_rate_policy(
         ]
         return FixedWindowCallRatePolicy(
             next_reset_ts=model.next_reset_ts,
-            period=parse_duration(model.period),
+            period=model.period,
             call_limit=model.call_limit,
             matchers=matchers,
         )
diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
index 769bc52a0..bc72ea36b 100644
--- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
+++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
@@ -3644,3 +3644,82 @@ def test_api_budget():
     # The 0.1s from 'PT0.1S' is stored in ms by PyRateLimiter internally
     # but here just check that the limit and interval exist
     assert policy._bucket.rates[0].interval == 100  # 100 ms
+
+
+def test_api_budget_fixed_window_policy():
+    manifest = {
+        "type": "DeclarativeSource",
+        # Root-level api_budget referencing a FixedWindowCallRatePolicy
+        "api_budget": {
+            "type": "APIBudget",
+            "maximum_attempts_to_acquire": 9999,
+            "policies": [
+                {
+                    "type": "FixedWindowCallRatePolicy",
+                    "next_reset_ts": "2025-01-01T00:00:00Z",
+                    "period": "PT1M",  # 1 minute
+                    "call_limit": 10,
+                    "matchers": [
+                        {
+                            "type": "HttpRequestRegexMatcher",
+                            "method": "GET",
+                            "url_base": "https://example.org",
+                            "url_path_pattern": "/v2/data",
+                        }
+                    ],
+                }
+            ],
+        },
+        # We'll define a single HttpRequester that references that base
+        "my_requester": {
+            "type": "HttpRequester",
+            "path": "/v2/data",
+            "url_base": "https://example.org",
+            "http_method": "GET",
+            "authenticator": {"type": "NoAuth"},
+        },
+    }
+
+    config = {}
+
+    factory = ModelToComponentFactory()
+    if "api_budget" in manifest:
+        factory.set_api_budget(manifest["api_budget"], config)
+
+    from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
+        HttpRequester as HttpRequesterModel,
+    )
+
+    requester_definition = manifest["my_requester"]
+    assert requester_definition["type"] == "HttpRequester"
+    http_requester = factory.create_component(
+        model_type=HttpRequesterModel,
+        component_definition=requester_definition,
+        config=config,
+        name="my_stream",
+        decoder=None,
+    )
+
+    assert http_requester.api_budget is not None
+    assert http_requester.api_budget.maximum_attempts_to_acquire == 9999
+    assert len(http_requester.api_budget.policies) == 1
+
+    from airbyte_cdk.sources.streams.call_rate import FixedWindowCallRatePolicy
+
+    policy = http_requester.api_budget.policies[0]
+    assert isinstance(policy, FixedWindowCallRatePolicy)
+    assert policy._call_limit == 10
+    # The period is "PT1M" => 60 seconds
+    assert policy._offset.total_seconds() == 60
+
+    expected_reset_dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
+    assert policy._next_reset_ts == expected_reset_dt
+
+    assert len(policy._matchers) == 1
+    matcher = policy._matchers[0]
+    from airbyte_cdk.sources.streams.call_rate import HttpRequestRegexMatcher
+
+    assert isinstance(matcher, HttpRequestRegexMatcher)
+    assert matcher._method == "GET"
+    assert matcher._url_base == "https://example.org"
+    assert matcher._url_path_pattern.pattern == "/v2/data"

From 8d3bfce9fef2442d46eab37cbe6e5d5f275c7ec5 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 10 Feb 2025 11:24:38 +0200
Subject: [PATCH 07/26] Change the partitions limit to 1000

---
 .../declarative/incremental/concurrent_partition_cursor.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index ab667c655..fd803df49 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -58,7 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
     CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
     """
 
-    DEFAULT_MAX_PARTITIONS_NUMBER = 10000
+    DEFAULT_MAX_PARTITIONS_NUMBER = 1000
     _NO_STATE: Mapping[str, Any] = {}
     _NO_CURSOR_STATE: Mapping[str, Any] = {}
     _GLOBAL_STATE_KEY = "state"

From 509ea05575c146587d2d0c0970e09a886fee3a35 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 10 Feb 2025 17:31:53 +0200
Subject: [PATCH 08/26] Refactored switching logic

---
 .../incremental/concurrent_partition_cursor.py  | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index fd803df49..f54a0297f 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -58,7 +58,8 @@ class ConcurrentPerPartitionCursor(Cursor):
     CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
     """
 
-    DEFAULT_MAX_PARTITIONS_NUMBER = 1000
+    DEFAULT_MAX_PARTITIONS_NUMBER = 10_000
+    SWITCH_TO_GLOBAL_LIMIT = 1000
     _NO_STATE: Mapping[str, Any] = {}
     _NO_CURSOR_STATE: Mapping[str, Any] = {}
     _GLOBAL_STATE_KEY = "state"
@@ -99,7 +100,7 @@ def __init__(
         self._new_global_cursor: Optional[StreamState] = None
         self._lookback_window: int = 0
         self._parent_state: Optional[StreamState] = None
-        self._over_limit: int = 0
+        self._number_of_partitions: int = 0
         self._use_global_cursor: bool = False
         self._partition_serializer = PerPartitionKeySerializer()
 
@@ -233,8 +234,8 @@ def _ensure_partition_limit(self) -> None:
           or removed due to being the oldest.
         """
         with self._lock:
+            self._number_of_partitions += 1
             while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
-                self._over_limit += 1
                 # Try removing finished partitions first
                 for partition_key in list(self._cursor_per_partition.keys()):
                     if (
@@ -245,7 +246,7 @@ def _ensure_partition_limit(self) -> None:
                             partition_key
                         )  # Remove the oldest partition
                         logger.warning(
-                            f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
+                            f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
                         )
                         break
                 else:
@@ -254,7 +255,7 @@ def _ensure_partition_limit(self) -> None:
                         1
                     ]  # Remove the oldest partition
                     logger.warning(
-                        f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
+                        f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
                     )
 
     def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -355,6 +356,10 @@ def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
 
     def observe(self, record: Record) -> None:
         if not self._use_global_cursor and self.limit_reached():
+            logger.info(
+                f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
+                f"Switching to global cursor for {self._stream_name}."
+            )
             self._use_global_cursor = True
 
         if not record.associated_slice:
@@ -397,4 +402,4 @@ def _get_cursor(self, record: Record) -> ConcurrentCursor:
         return cursor
 
     def limit_reached(self) -> bool:
-        return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
+        return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT

From 8d44150ce61cb38aaf4e9ce30183ef43f3a7a0fd Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 10 Feb 2025 20:53:06 +0200
Subject: [PATCH 09/26] Increase the limit for number of partitions in memory

---
 .../declarative/incremental/concurrent_partition_cursor.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index f54a0297f..d69b61bfd 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -58,7 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
     CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
     """
 
-    DEFAULT_MAX_PARTITIONS_NUMBER = 10_000
+    DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
     SWITCH_TO_GLOBAL_LIMIT = 1000
     _NO_STATE: Mapping[str, Any] = {}
     _NO_CURSOR_STATE: Mapping[str, Any] = {}

From 342375c5fc1017a8738fbc0a7166695f24388801 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Wed, 12 Feb 2025 15:42:21 +0200
Subject: [PATCH 10/26] Refactor ConcurrentPerPartitionCursor to not use
 ConcurrentCursor with `_use_global_cursor`

---
 .../declarative_component_schema.yaml         | 207 ------------------
 .../concurrent_partition_cursor.py            |  29 ++-
 .../manifest_declarative_source.py            |   4 -
 .../models/declarative_component_schema.py    | 165 --------------
 .../parsers/model_to_component_factory.py     | 141 ------------
 .../declarative/requesters/http_requester.py  |   3 -
 airbyte_cdk/sources/streams/call_rate.py      | 156 ++++---------
 7 files changed, 59 insertions(+), 646 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index 25c9492fb..b0242c94f 100644
--- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -40,12 +40,6 @@ properties:
     "$ref": "#/definitions/Spec"
   concurrency_level:
     "$ref": "#/definitions/ConcurrencyLevel"
-  api_budget:
-    title: API Budget
-    description: Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.
-    anyOf:
-      - "$ref": "#/definitions/APIBudget"
-      - "$ref": "#/definitions/HTTPAPIBudget"
   metadata:
     type: object
     description: For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.
@@ -1371,207 +1365,6 @@ definitions:
       $parameters:
         type: object
         additional_properties: true
-  APIBudget:
-    title: API Budget
-    description: >
-      A generic API budget configuration that defines the policies (rate limiting rules)
-      and the maximum number of attempts to acquire a call credit. This budget does not automatically
-      update itself based on HTTP response headers.
-    type: object
-    required:
-      - type
-      - policies
-    properties:
-      type:
-        type: string
-        enum: [APIBudget]
-      policies:
-        title: Policies
-        description: List of call rate policies that define how many calls are allowed.
-        type: array
-        items:
-          anyOf:
-            - "$ref": "#/definitions/FixedWindowCallRatePolicy"
-            - "$ref": "#/definitions/MovingWindowCallRatePolicy"
-            - "$ref": "#/definitions/UnlimitedCallRatePolicy"
-      maximum_attempts_to_acquire:
-        title: Maximum Attempts to Acquire
-        description: The maximum number of attempts to acquire a call before giving up.
-        type: integer
-        default: 100000
-    additionalProperties: true
-  HTTPAPIBudget:
-    title: HTTP API Budget
-    description: >
-      An HTTP-specific API budget that extends APIBudget by updating rate limiting information based
-      on HTTP response headers. It extracts available calls and the next reset timestamp from the HTTP responses.
-    type: object
-    required:
-      - type
-      - policies
-    properties:
-      type:
-        type: string
-        enum: [HTTPAPIBudget]
-      policies:
-        title: Policies
-        description: List of call rate policies that define how many calls are allowed.
-        type: array
-        items:
-          anyOf:
-            - "$ref": "#/definitions/FixedWindowCallRatePolicy"
-            - "$ref": "#/definitions/MovingWindowCallRatePolicy"
-            - "$ref": "#/definitions/UnlimitedCallRatePolicy"
-      ratelimit_reset_header:
-        title: Rate Limit Reset Header
-        description: The HTTP response header name that indicates when the rate limit resets.
-        type: string
-        default: "ratelimit-reset"
-      ratelimit_remaining_header:
-        title: Rate Limit Remaining Header
-        description: The HTTP response header name that indicates the number of remaining allowed calls.
-        type: string
-        default: "ratelimit-remaining"
-      status_codes_for_ratelimit_hit:
-        title: Status Codes for Rate Limit Hit
-        description: List of HTTP status codes that indicate a rate limit has been hit.
-        type: array
-        items:
-          type: integer
-        default: [429]
-      maximum_attempts_to_acquire:
-        title: Maximum Attempts to Acquire
-        description: The maximum number of attempts to acquire a call before giving up.
-        type: integer
-        default: 100000
-    additionalProperties: true
-  FixedWindowCallRatePolicy:
-    title: Fixed Window Call Rate Policy
-    description: A policy that allows a fixed number of calls within a specific time window.
-    type: object
-    required:
-      - type
-      - next_reset_ts
-      - period
-      - call_limit
-      - matchers
-    properties:
-      type:
-        type: string
-        enum: [FixedWindowCallRatePolicy]
-      next_reset_ts:
-        title: Next Reset Timestamp
-        description: The timestamp when the rate limit will reset.
-        type: string
-        format: date-time
-      period:
-        title: Period
-        description: The time interval for the rate limit window.
-        type: string
-        format: duration
-      call_limit:
-        title: Call Limit
-        description: The maximum number of calls allowed within the period.
-        type: integer
-      matchers:
-        title: Matchers
-        description: List of matchers that define which requests this policy applies to.
-        type: array
-        items:
-          "$ref": "#/definitions/HttpRequestRegexMatcher"
-    additionalProperties: true
-  MovingWindowCallRatePolicy:
-    title: Moving Window Call Rate Policy
-    description: A policy that allows a fixed number of calls within a moving time window.
-    type: object
-    required:
-      - type
-      - rates
-      - matchers
-    properties:
-      type:
-        type: string
-        enum: [MovingWindowCallRatePolicy]
-      rates:
-        title: Rates
-        description: List of rates that define the call limits for different time intervals.
-        type: array
-        items:
-          "$ref": "#/definitions/Rate"
-      matchers:
-        title: Matchers
-        description: List of matchers that define which requests this policy applies to.
-        type: array
-        items:
-          "$ref": "#/definitions/HttpRequestRegexMatcher"
-    additionalProperties: true
-  UnlimitedCallRatePolicy:
-    title: Unlimited Call Rate Policy
-    description: A policy that allows unlimited calls for specific requests.
-    type: object
-    required:
-      - type
-      - matchers
-    properties:
-      type:
-        type: string
-        enum: [UnlimitedCallRatePolicy]
-      matchers:
-        title: Matchers
-        description: List of matchers that define which requests this policy applies to.
-        type: array
-        items:
-          "$ref": "#/definitions/HttpRequestRegexMatcher"
-    additionalProperties: true
-  Rate:
-    title: Rate
-    description: Defines a rate limit with a specific number of calls allowed within a time interval.
-    type: object
-    required:
-      - limit
-      - interval
-    properties:
-      limit:
-        title: Limit
-        description: The maximum number of calls allowed within the interval.
-        type: integer
-      interval:
-        title: Interval
-        description: The time interval for the rate limit.
-        type: string
-        format: duration
-    additionalProperties: true
-  HttpRequestRegexMatcher:
-    title: HTTP Request Matcher
-    description: >
-      Matches HTTP requests based on method, base URL, URL path pattern, query parameters, and headers.
-      Use `url_base` to specify the scheme and host (without trailing slash) and
-      `url_path_pattern` to apply a regex to the request path.
-    type: object
-    properties:
-      method:
-        title: Method
-        description: The HTTP method to match (e.g., GET, POST).
-        type: string
-      url_base:
-        title: URL Base
-        description: The base URL (scheme and host, e.g. "https://api.example.com") to match.
-        type: string
-      url_path_pattern:
-        title: URL Path Pattern
-        description: A regular expression pattern to match the URL path.
-        type: string
-      params:
-        title: Parameters
-        description: The query parameters to match.
-        type: object
-        additionalProperties: true
-      headers:
-        title: Headers
-        description: The headers to match.
-        type: object
-        additionalProperties: true
-    additionalProperties: true
   DefaultErrorHandler:
     title: Default Error Handler
     description: Component defining how to handle errors. Default behavior includes only retrying server errors (HTTP 5XX) and too many requests (HTTP 429) with an exponential backoff.
diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index d69b61bfd..fc75ecd90 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -142,7 +142,8 @@ def close_partition(self, partition: Partition) -> None:
             raise ValueError("stream_slice cannot be None")
 
         partition_key = self._to_partition_key(stream_slice.partition)
-        self._cursor_per_partition[partition_key].close_partition(partition=partition)
+        if not self._use_global_cursor:
+            self._cursor_per_partition[partition_key].close_partition(partition=partition)
         with self._lock:
             self._semaphore_per_partition[partition_key].acquire()
             cursor = self._cursor_per_partition[partition_key]
@@ -150,12 +151,7 @@ def close_partition(self, partition: Partition) -> None:
                 partition_key in self._finished_partitions
                 and self._semaphore_per_partition[partition_key]._value == 0
             ):
-                if (
-                    self._new_global_cursor is None
-                    or self._new_global_cursor[self.cursor_field.cursor_field_key]
-                    < cursor.state[self.cursor_field.cursor_field_key]
-                ):
-                    self._new_global_cursor = copy.deepcopy(cursor.state)
+                self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
             if not self._use_global_cursor:
                 self._emit_state_message()
 
@@ -366,9 +362,22 @@ def observe(self, record: Record) -> None:
             raise ValueError(
                 "Invalid state as stream slices that are emitted should refer to an existing cursor"
             )
-        self._cursor_per_partition[
-            self._to_partition_key(record.associated_slice.partition)
-        ].observe(record)
+
+        record_cursor = self._connector_state_converter.parse_value(
+            self._cursor_field.extract_value(record)
+        )
+        self._update_global_cursor(record_cursor)
+        if not self._use_global_cursor:
+            self._cursor_per_partition[
+                self._to_partition_key(record.associated_slice.partition)
+            ].observe(record)
+
+    def _update_global_cursor(self, value: Mapping[str, Any]) -> None:
+        if (
+            self._new_global_cursor is None
+            or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
+        ):
+            self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
 
     def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
         return self._partition_serializer.to_partition_key(partition)
diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py
index d3afb1396..efc779464 100644
--- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py
+++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py
@@ -137,10 +137,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
             self._source_config, config
         )
 
-        api_budget_model = self._source_config.get("api_budget")
-        if api_budget_model:
-            self._constructor.set_api_budget(api_budget_model, config)
-
         source_streams = [
             self._constructor.create_component(
                 DeclarativeStreamModel,
diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
index aaff67548..fe29cee2c 100644
--- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
+++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -3,7 +3,6 @@
 
 from __future__ import annotations
 
-from datetime import datetime, timedelta
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union
 
@@ -643,45 +642,6 @@ class OAuthAuthenticator(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 
 
-class Rate(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    limit: int = Field(
-        ...,
-        description="The maximum number of calls allowed within the interval.",
-        title="Limit",
-    )
-    interval: timedelta = Field(
-        ..., description="The time interval for the rate limit.", title="Interval"
-    )
-
-
-class HttpRequestRegexMatcher(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    method: Optional[str] = Field(
-        None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
-    )
-    url_base: Optional[str] = Field(
-        None,
-        description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
-        title="URL Base",
-    )
-    url_path_pattern: Optional[str] = Field(
-        None,
-        description="A regular expression pattern to match the URL path.",
-        title="URL Path Pattern",
-    )
-    params: Optional[Dict[str, Any]] = Field(
-        None, description="The query parameters to match.", title="Parameters"
-    )
-    headers: Optional[Dict[str, Any]] = Field(
-        None, description="The headers to match.", title="Headers"
-    )
-
-
 class DpathExtractor(BaseModel):
     type: Literal["DpathExtractor"]
     field_path: List[str] = Field(
@@ -1624,60 +1584,6 @@ class DatetimeBasedCursor(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 
 
-class FixedWindowCallRatePolicy(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    type: Literal["FixedWindowCallRatePolicy"]
-    next_reset_ts: datetime = Field(
-        ...,
-        description="The timestamp when the rate limit will reset.",
-        title="Next Reset Timestamp",
-    )
-    period: timedelta = Field(
-        ..., description="The time interval for the rate limit window.", title="Period"
-    )
-    call_limit: int = Field(
-        ...,
-        description="The maximum number of calls allowed within the period.",
-        title="Call Limit",
-    )
-    matchers: List[HttpRequestRegexMatcher] = Field(
-        ...,
-        description="List of matchers that define which requests this policy applies to.",
-        title="Matchers",
-    )
-
-
-class MovingWindowCallRatePolicy(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    type: Literal["MovingWindowCallRatePolicy"]
-    rates: List[Rate] = Field(
-        ...,
-        description="List of rates that define the call limits for different time intervals.",
-        title="Rates",
-    )
-    matchers: List[HttpRequestRegexMatcher] = Field(
-        ...,
-        description="List of matchers that define which requests this policy applies to.",
-        title="Matchers",
-    )
-
-
-class UnlimitedCallRatePolicy(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    type: Literal["UnlimitedCallRatePolicy"]
-    matchers: List[HttpRequestRegexMatcher] = Field(
-        ...,
-        description="List of matchers that define which requests this policy applies to.",
-        title="Matchers",
-    )
-
-
 class DefaultErrorHandler(BaseModel):
     type: Literal["DefaultErrorHandler"]
     backoff_strategies: Optional[
@@ -1809,67 +1715,6 @@ class CompositeErrorHandler(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
 
 
-class APIBudget(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    type: Literal["APIBudget"]
-    policies: List[
-        Union[
-            FixedWindowCallRatePolicy,
-            MovingWindowCallRatePolicy,
-            UnlimitedCallRatePolicy,
-        ]
-    ] = Field(
-        ...,
-        description="List of call rate policies that define how many calls are allowed.",
-        title="Policies",
-    )
-    maximum_attempts_to_acquire: Optional[int] = Field(
-        100000,
-        description="The maximum number of attempts to acquire a call before giving up.",
-        title="Maximum Attempts to Acquire",
-    )
-
-
-class HTTPAPIBudget(BaseModel):
-    class Config:
-        extra = Extra.allow
-
-    type: Literal["HTTPAPIBudget"]
-    policies: List[
-        Union[
-            FixedWindowCallRatePolicy,
-            MovingWindowCallRatePolicy,
-            UnlimitedCallRatePolicy,
-        ]
-    ] = Field(
-        ...,
-        description="List of call rate policies that define how many calls are allowed.",
-        title="Policies",
-    )
-    ratelimit_reset_header: Optional[str] = Field(
-        "ratelimit-reset",
-        description="The HTTP response header name that indicates when the rate limit resets.",
-        title="Rate Limit Reset Header",
-    )
-    ratelimit_remaining_header: Optional[str] = Field(
-        "ratelimit-remaining",
-        description="The HTTP response header name that indicates the number of remaining allowed calls.",
-        title="Rate Limit Remaining Header",
-    )
-    status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
-        [429],
-        description="List of HTTP status codes that indicate a rate limit has been hit.",
-        title="Status Codes for Rate Limit Hit",
-    )
-    maximum_attempts_to_acquire: Optional[int] = Field(
-        100000,
-        description="The maximum number of attempts to acquire a call before giving up.",
-        title="Maximum Attempts to Acquire",
-    )
-
-
 class ZipfileDecoder(BaseModel):
     class Config:
         extra = Extra.allow
@@ -1903,11 +1748,6 @@ class Config:
     definitions: Optional[Dict[str, Any]] = None
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
-    api_budget: Optional[Union[APIBudget, HTTPAPIBudget]] = Field(
-        None,
-        description="Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.",
-        title="API Budget",
-    )
     metadata: Optional[Dict[str, Any]] = Field(
         None,
         description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
@@ -1934,11 +1774,6 @@ class Config:
     definitions: Optional[Dict[str, Any]] = None
     spec: Optional[Spec] = None
     concurrency_level: Optional[ConcurrencyLevel] = None
-    api_budget: Optional[Union[APIBudget, HTTPAPIBudget]] = Field(
-        None,
-        description="Defines how many requests can be made to the API in a given time frame. This field accepts either a generic APIBudget or an HTTP-specific configuration (HTTPAPIBudget) to be applied across all streams.",
-        title="API Budget",
-    )
     metadata: Optional[Dict[str, Any]] = Field(
         None,
         description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 4b80e851b..c6d69623d 100644
--- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -112,9 +112,6 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     AddFields as AddFieldsModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    APIBudget as APIBudgetModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
 )
@@ -229,9 +226,6 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     FlattenFields as FlattenFieldsModel,
 )
@@ -241,18 +235,12 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     GzipParser as GzipParserModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    HTTPAPIBudget as HTTPAPIBudgetModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpComponentsResolver as HttpComponentsResolverModel,
 )
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpRequester as HttpRequesterModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     HttpResponseFilter as HttpResponseFilterModel,
 )
@@ -307,9 +295,6 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     MinMaxDatetime as MinMaxDatetimeModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     NoAuth as NoAuthModel,
 )
@@ -328,9 +313,6 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     ParentStreamConfig as ParentStreamConfigModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    Rate as RateModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     RecordFilter as RecordFilterModel,
 )
@@ -374,9 +356,6 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     TypesMap as TypesMapModel,
 )
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-    UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
-)
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -490,15 +469,6 @@
     MessageRepository,
     NoopMessageRepository,
 )
-from airbyte_cdk.sources.streams.call_rate import (
-    APIBudget,
-    FixedWindowCallRatePolicy,
-    HttpAPIBudget,
-    HttpRequestRegexMatcher,
-    MovingWindowCallRatePolicy,
-    Rate,
-    UnlimitedCallRatePolicy,
-)
 from airbyte_cdk.sources.streams.concurrent.clamping import (
     ClampingEndProvider,
     ClampingStrategy,
@@ -550,7 +520,6 @@ def __init__(
             self._evaluate_log_level(emit_connector_builder_messages)
         )
         self._connector_state_manager = connector_state_manager or ConnectorStateManager()
-        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
 
     def _init_mappings(self) -> None:
         self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -638,13 +607,6 @@ def _init_mappings(self) -> None:
             StreamConfigModel: self.create_stream_config,
             ComponentMappingDefinitionModel: self.create_components_mapping_definition,
             ZipfileDecoderModel: self.create_zipfile_decoder,
-            APIBudgetModel: self.create_api_budget,
-            HTTPAPIBudgetModel: self.create_http_api_budget,
-            FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
-            MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
-            UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
-            RateModel: self.create_rate,
-            HttpRequestRegexMatcherModel: self.create_http_request_matcher,
         }
 
         # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1957,8 +1919,6 @@ def create_http_requester(
             )
         )
 
-        api_budget = self._api_budget
-
         request_options_provider = InterpolatedRequestOptionsProvider(
             request_body_data=model.request_body_data,
             request_body_json=model.request_body_json,
@@ -1979,7 +1939,6 @@ def create_http_requester(
             path=model.path,
             authenticator=authenticator,
             error_handler=error_handler,
-            api_budget=api_budget,
             http_method=HttpMethod[model.http_method.value],
             request_options_provider=request_options_provider,
             config=config,
@@ -2981,103 +2940,3 @@ def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
             return isinstance(parser.inner_parser, JsonParser)
         else:
             return False
-
-    def create_api_budget(self, model: APIBudgetModel, config: Config, **kwargs: Any) -> APIBudget:
-        policies = [
-            self._create_component_from_model(model=policy, config=config)
-            for policy in model.policies
-        ]
-
-        return APIBudget(
-            policies=policies,
-            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire or 100000,
-        )
-
-    def create_http_api_budget(
-        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
-    ) -> HttpAPIBudget:
-        policies = [
-            self._create_component_from_model(model=policy, config=config)
-            for policy in model.policies
-        ]
-
-        return HttpAPIBudget(
-            policies=policies,
-            maximum_attempts_to_acquire=model.maximum_attempts_to_acquire or 100000,
-            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
-            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
-            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or (429,),
-        )
-
-    def create_fixed_window_call_rate_policy(
-        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
-    ) -> FixedWindowCallRatePolicy:
-        matchers = [
-            self._create_component_from_model(model=matcher, config=config)
-            for matcher in model.matchers
-        ]
-        return FixedWindowCallRatePolicy(
-            next_reset_ts=model.next_reset_ts,
-            period=model.period,
-            call_limit=model.call_limit,
-            matchers=matchers,
-        )
-
-    def create_moving_window_call_rate_policy(
-        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
-    ) -> MovingWindowCallRatePolicy:
-        rates = [
-            self._create_component_from_model(model=rate, config=config) for rate in model.rates
-        ]
-        matchers = [
-            self._create_component_from_model(model=matcher, config=config)
-            for matcher in model.matchers
-        ]
-        return MovingWindowCallRatePolicy(
-            rates=rates,
-            matchers=matchers,
-        )
-
-    def create_unlimited_call_rate_policy(
-        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
-    ) -> UnlimitedCallRatePolicy:
-        matchers = [
-            self._create_component_from_model(model=matcher, config=config)
-            for matcher in model.matchers
-        ]
-
-        return UnlimitedCallRatePolicy(
-            matchers=matchers,
-        )
-
-    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
-        return Rate(
-            limit=model.limit,
-            interval=model.interval,
-        )
-
-    def create_http_request_matcher(
-        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
-    ) -> HttpRequestRegexMatcher:
-        return HttpRequestRegexMatcher(
-            method=model.method,
-            url_base=model.url_base,
-            url_path_pattern=model.url_path_pattern,
-            params=model.params,
-            headers=model.headers,
-        )
-
-    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
-        model_str = component_definition.get("type")
-        if model_str == "APIBudget":
-            # Annotate model_type as a type that is a subclass of BaseModel
-            model_type: Union[Type[APIBudgetModel], Type[HTTPAPIBudgetModel]] = APIBudgetModel
-        elif model_str == "HTTPAPIBudget":
-            model_type = HTTPAPIBudgetModel
-        else:
-            raise ValueError(f"Unknown API Budget type: {model_str}")
-
-        # create_component expects a type[BaseModel] and returns an instance of that model.
-        self._api_budget = self.create_component(
-            model_type=model_type, component_definition=component_definition, config=config
-        )
diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py
index b206bd688..ad23f4d06 100644
--- a/airbyte_cdk/sources/declarative/requesters/http_requester.py
+++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py
@@ -22,7 +22,6 @@
 )
 from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
 from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
-from airbyte_cdk.sources.streams.call_rate import APIBudget
 from airbyte_cdk.sources.streams.http import HttpClient
 from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
 from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -56,7 +55,6 @@ class HttpRequester(Requester):
     http_method: Union[str, HttpMethod] = HttpMethod.GET
     request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
     error_handler: Optional[ErrorHandler] = None
-    api_budget: Optional[APIBudget] = None
     disable_retries: bool = False
     message_repository: MessageRepository = NoopMessageRepository()
     use_cache: bool = False
@@ -93,7 +91,6 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
             name=self.name,
             logger=self.logger,
             error_handler=self.error_handler,
-            api_budget=self.api_budget,
             authenticator=self._authenticator,
             use_cache=self.use_cache,
             backoff_strategy=backoff_strategies,
diff --git a/airbyte_cdk/sources/streams/call_rate.py b/airbyte_cdk/sources/streams/call_rate.py
index 21fec881f..81ebac78e 100644
--- a/airbyte_cdk/sources/streams/call_rate.py
+++ b/airbyte_cdk/sources/streams/call_rate.py
@@ -6,12 +6,10 @@
 import dataclasses
 import datetime
 import logging
-import re
 import time
-from dataclasses import InitVar, dataclass, field
 from datetime import timedelta
 from threading import RLock
-from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
+from typing import TYPE_CHECKING, Any, Mapping, Optional
 from urllib import parse
 
 import requests
@@ -162,100 +160,6 @@ def __call__(self, request: Any) -> bool:
         return True
 
 
-class HttpRequestRegexMatcher(RequestMatcher):
-    """
-    Extended RequestMatcher for HTTP requests that supports matching on:
-      - HTTP method (case-insensitive)
-      - URL base (scheme + netloc) optionally
-      - URL path pattern (a regex applied to the path portion of the URL)
-      - Query parameters (must be present)
-      - Headers (header names compared case-insensitively)
-    """
-
-    def __init__(
-        self,
-        method: Optional[str] = None,
-        url_base: Optional[str] = None,
-        url_path_pattern: Optional[str] = None,
-        params: Optional[Mapping[str, Any]] = None,
-        headers: Optional[Mapping[str, Any]] = None,
-    ):
-        """
-        :param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively.
-        :param url_base: Base URL (scheme://host) that must match.
-        :param url_path_pattern: A regex pattern that will be applied to the path portion of the URL.
-        :param params: Dictionary of query parameters that must be present in the request.
-        :param headers: Dictionary of headers that must be present (header keys are compared case-insensitively).
-        """
-        self._method = method.upper() if method else None
-
-        # Normalize the url_base if provided: remove trailing slash.
-        self._url_base = url_base.rstrip("/") if url_base else None
-
-        # Compile the URL path pattern if provided.
-        self._url_path_pattern = re.compile(url_path_pattern) if url_path_pattern else None
-
-        # Normalize query parameters to strings.
-        self._params = {str(k): str(v) for k, v in (params or {}).items()}
-
-        # Normalize header keys to lowercase.
-        self._headers = {str(k).lower(): str(v) for k, v in (headers or {}).items()}
-
-    @staticmethod
-    def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
-        """Check that every key/value in the pattern exists in the object."""
-        return pattern.items() <= obj.items()
-
-    def __call__(self, request: Any) -> bool:
-        """
-        :param request: A requests.Request or requests.PreparedRequest instance.
-        :return: True if the request matches all provided criteria; False otherwise.
-        """
-        # Prepare the request (if needed) and extract the URL details.
-        if isinstance(request, requests.Request):
-            prepared_request = request.prepare()
-        elif isinstance(request, requests.PreparedRequest):
-            prepared_request = request
-        else:
-            return False
-
-        # Check HTTP method.
-        if self._method is not None and prepared_request.method is not None:
-            if prepared_request.method.upper() != self._method:
-                return False
-
-        # Parse the URL.
-        parsed_url = parse.urlsplit(prepared_request.url)
-        # Reconstruct the base: scheme://netloc
-        request_url_base = f"{str(parsed_url.scheme)}://{str(parsed_url.netloc)}"
-        # The path (without query parameters)
-        request_path = str(parsed_url.path).rstrip("/")
-
-        # If a base URL is provided, check that it matches.
-        if self._url_base is not None:
-            if request_url_base != self._url_base:
-                return False
-
-        # If a URL path pattern is provided, ensure the path matches the regex.
-        if self._url_path_pattern is not None:
-            if not self._url_path_pattern.search(request_path):
-                return False
-
-        # Check query parameters.
-        if self._params:
-            query_params = dict(parse.parse_qsl(str(parsed_url.query)))
-            if not self._match_dict(query_params, self._params):
-                return False
-
-        # Check headers (normalize keys to lower-case).
-        if self._headers:
-            req_headers = {k.lower(): v for k, v in prepared_request.headers.items()}
-            if not self._match_dict(req_headers, self._headers):
-                return False
-
-        return True
-
-
 class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC):
     def __init__(self, matchers: list[RequestMatcher]):
         self._matchers = matchers
@@ -495,17 +399,24 @@ def update_from_response(self, request: Any, response: Any) -> None:
         """
 
 
-@dataclass
 class APIBudget(AbstractAPIBudget):
-    """
-    Default APIBudget implementation.
-    """
+    """Default APIBudget implementation"""
+
+    def __init__(
+        self, policies: list[AbstractCallRatePolicy], maximum_attempts_to_acquire: int = 100000
+    ) -> None:
+        """Constructor
+
+        :param policies: list of policies in this budget
+        :param maximum_attempts_to_acquire: number of attempts before throwing hit ratelimit exception, we put some big number here
+         to avoid situations when many threads compete with each other for a few lots over a significant amount of time
+        """
 
-    policies: list[AbstractCallRatePolicy]
-    maximum_attempts_to_acquire: int = 100000
+        self._policies = policies
+        self._maximum_attempts_to_acquire = maximum_attempts_to_acquire
 
     def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
-        for policy in self.policies:
+        for policy in self._policies:
             if policy.matches(request):
                 return policy
         return None
@@ -526,7 +437,7 @@ def acquire_call(
         policy = self.get_matching_policy(request)
         if policy:
             self._do_acquire(request=request, policy=policy, block=block, timeout=timeout)
-        elif self.policies:
+        elif self._policies:
             logger.info("no policies matched with requests, allow call by default")
 
     def update_from_response(self, request: Any, response: Any) -> None:
@@ -549,7 +460,7 @@ def _do_acquire(
         """
         last_exception = None
         # sometimes we spend all budget before a second attempt, so we have few more here
-        for attempt in range(1, self.maximum_attempts_to_acquire):
+        for attempt in range(1, self._maximum_attempts_to_acquire):
             try:
                 policy.try_acquire(request, weight=1)
                 return
@@ -573,18 +484,31 @@ def _do_acquire(
 
         if last_exception:
             logger.info(
-                "we used all %s attempts to acquire and failed", self.maximum_attempts_to_acquire
+                "we used all %s attempts to acquire and failed", self._maximum_attempts_to_acquire
             )
             raise last_exception
 
 
-@dataclass
 class HttpAPIBudget(APIBudget):
     """Implementation of AbstractAPIBudget for HTTP"""
 
-    ratelimit_reset_header: str = "ratelimit-reset"
-    ratelimit_remaining_header: str = "ratelimit-remaining"
-    status_codes_for_ratelimit_hit: Union[tuple[int], list[int]] = (429,)
+    def __init__(
+        self,
+        ratelimit_reset_header: str = "ratelimit-reset",
+        ratelimit_remaining_header: str = "ratelimit-remaining",
+        status_codes_for_ratelimit_hit: tuple[int] = (429,),
+        **kwargs: Any,
+    ):
+        """Constructor
+
+        :param ratelimit_reset_header: name of the header that has a timestamp of the next reset of call budget
+        :param ratelimit_remaining_header: name of the header that has the number of calls left
+        :param status_codes_for_ratelimit_hit: list of HTTP status codes that signal about rate limit being hit
+        """
+        self._ratelimit_reset_header = ratelimit_reset_header
+        self._ratelimit_remaining_header = ratelimit_remaining_header
+        self._status_codes_for_ratelimit_hit = status_codes_for_ratelimit_hit
+        super().__init__(**kwargs)
 
     def update_from_response(self, request: Any, response: Any) -> None:
         policy = self.get_matching_policy(request)
@@ -599,17 +523,17 @@ def update_from_response(self, request: Any, response: Any) -> None:
     def get_reset_ts_from_response(
         self, response: requests.Response
     ) -> Optional[datetime.datetime]:
-        if response.headers.get(self.ratelimit_reset_header):
+        if response.headers.get(self._ratelimit_reset_header):
             return datetime.datetime.fromtimestamp(
-                int(response.headers[self.ratelimit_reset_header])
+                int(response.headers[self._ratelimit_reset_header])
             )
         return None
 
     def get_calls_left_from_response(self, response: requests.Response) -> Optional[int]:
-        if response.headers.get(self.ratelimit_remaining_header):
-            return int(response.headers[self.ratelimit_remaining_header])
+        if response.headers.get(self._ratelimit_remaining_header):
+            return int(response.headers[self._ratelimit_remaining_header])
 
-        if response.status_code in self.status_codes_for_ratelimit_hit:
+        if response.status_code in self._status_codes_for_ratelimit_hit:
             return 0
 
         return None

From 05f4db7b6a3222af20d624439882a080c3014642 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Wed, 12 Feb 2025 15:44:45 +0200
Subject: [PATCH 11/26] Delete code from another branch

---
 .../test_model_to_component_factory.py        | 159 ------------------
 .../requesters/test_http_requester.py         |  32 ----
 unit_tests/sources/streams/test_call_rate.py  |  88 ----------
 3 files changed, 279 deletions(-)

diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
index 14e3460e0..32a73f364 100644
--- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
+++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py
@@ -142,7 +142,6 @@
 from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields
 from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
 from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
-from airbyte_cdk.sources.streams.call_rate import MovingWindowCallRatePolicy
 from airbyte_cdk.sources.streams.concurrent.clamping import (
     ClampingEndProvider,
     DayClampingStrategy,
@@ -3685,161 +3684,3 @@ def test_create_async_retriever():
     assert isinstance(selector, RecordSelector)
     assert isinstance(extractor, DpathExtractor)
     assert extractor.field_path == ["data"]
-
-
-def test_api_budget():
-    manifest = {
-        "type": "DeclarativeSource",
-        "api_budget": {
-            "type": "HTTPAPIBudget",
-            "ratelimit_reset_header": "X-RateLimit-Reset",
-            "ratelimit_remaining_header": "X-RateLimit-Remaining",
-            "status_codes_for_ratelimit_hit": [429, 503],
-            "policies": [
-                {
-                    "type": "MovingWindowCallRatePolicy",
-                    "rates": [
-                        {
-                            "type": "Rate",
-                            "limit": 3,
-                            "interval": "PT0.1S",  # 0.1 seconds
-                        }
-                    ],
-                    "matchers": [
-                        {
-                            "type": "HttpRequestRegexMatcher",
-                            "method": "GET",
-                            "url_base": "https://api.sendgrid.com",
-                            "url_path_pattern": "/v3/marketing/lists",
-                        }
-                    ],
-                }
-            ],
-        },
-        "my_requester": {
-            "type": "HttpRequester",
-            "path": "/v3/marketing/lists",
-            "url_base": "https://api.sendgrid.com",
-            "http_method": "GET",
-            "authenticator": {
-                "type": "BasicHttpAuthenticator",
-                "username": "admin",
-                "password": "{{ config['password'] }}",
-            },
-        },
-    }
-
-    config = {
-        "password": "verysecrettoken",
-    }
-
-    factory = ModelToComponentFactory()
-    if "api_budget" in manifest:
-        factory.set_api_budget(manifest["api_budget"], config)
-
-    from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-        HttpRequester as HttpRequesterModel,
-    )
-
-    requester_definition = manifest["my_requester"]
-    assert requester_definition["type"] == "HttpRequester"
-
-    http_requester = factory.create_component(
-        model_type=HttpRequesterModel,
-        component_definition=requester_definition,
-        config=config,
-        name="lists_stream",
-        decoder=None,
-    )
-
-    assert http_requester.api_budget is not None
-    assert http_requester.api_budget.ratelimit_reset_header == "X-RateLimit-Reset"
-    assert http_requester.api_budget.status_codes_for_ratelimit_hit == [429, 503]
-    assert len(http_requester.api_budget.policies) == 1
-
-    # The single policy is a MovingWindowCallRatePolicy
-    policy = http_requester.api_budget.policies[0]
-    assert isinstance(policy, MovingWindowCallRatePolicy)
-    assert policy._bucket.rates[0].limit == 3
-    # The 0.1s from 'PT0.1S' is stored in ms by PyRateLimiter internally
-    # but here just check that the limit and interval exist
-    assert policy._bucket.rates[0].interval == 100  # 100 ms
-
-
-def test_api_budget_fixed_window_policy():
-    manifest = {
-        "type": "DeclarativeSource",
-        # Root-level api_budget referencing a FixedWindowCallRatePolicy
-        "api_budget": {
-            "type": "APIBudget",
-            "maximum_attempts_to_acquire": 9999,
-            "policies": [
-                {
-                    "type": "FixedWindowCallRatePolicy",
-                    "next_reset_ts": "2025-01-01T00:00:00Z",
-                    "period": "PT1M",  # 1 minute
-                    "call_limit": 10,
-                    "matchers": [
-                        {
-                            "type": "HttpRequestRegexMatcher",
-                            "method": "GET",
-                            "url_base": "https://example.org",
-                            "url_path_pattern": "/v2/data",
-                        }
-                    ],
-                }
-            ],
-        },
-        # We'll define a single HttpRequester that references that base
-        "my_requester": {
-            "type": "HttpRequester",
-            "path": "/v2/data",
-            "url_base": "https://example.org",
-            "http_method": "GET",
-            "authenticator": {"type": "NoAuth"},
-        },
-    }
-
-    config = {}
-
-    factory = ModelToComponentFactory()
-    if "api_budget" in manifest:
-        factory.set_api_budget(manifest["api_budget"], config)
-
-    from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
-        HttpRequester as HttpRequesterModel,
-    )
-
-    requester_definition = manifest["my_requester"]
-    assert requester_definition["type"] == "HttpRequester"
-    http_requester = factory.create_component(
-        model_type=HttpRequesterModel,
-        component_definition=requester_definition,
-        config=config,
-        name="my_stream",
-        decoder=None,
-    )
-
-    assert http_requester.api_budget is not None
-    assert http_requester.api_budget.maximum_attempts_to_acquire == 9999
-    assert len(http_requester.api_budget.policies) == 1
-
-    from airbyte_cdk.sources.streams.call_rate import FixedWindowCallRatePolicy
-
-    policy = http_requester.api_budget.policies[0]
-    assert isinstance(policy, FixedWindowCallRatePolicy)
-    assert policy._call_limit == 10
-    # The period is "PT1M" => 60 seconds
-    assert policy._offset.total_seconds() == 60
-
-    expected_reset_dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
-    assert policy._next_reset_ts == expected_reset_dt
-
-    assert len(policy._matchers) == 1
-    matcher = policy._matchers[0]
-    from airbyte_cdk.sources.streams.call_rate import HttpRequestRegexMatcher
-
-    assert isinstance(matcher, HttpRequestRegexMatcher)
-    assert matcher._method == "GET"
-    assert matcher._url_base == "https://example.org"
-    assert matcher._url_path_pattern.pattern == "/v2/data"
diff --git a/unit_tests/sources/declarative/requesters/test_http_requester.py b/unit_tests/sources/declarative/requesters/test_http_requester.py
index c5d5c218d..f02ec206b 100644
--- a/unit_tests/sources/declarative/requesters/test_http_requester.py
+++ b/unit_tests/sources/declarative/requesters/test_http_requester.py
@@ -2,7 +2,6 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 
-from datetime import timedelta
 from typing import Any, Mapping, Optional
 from unittest import mock
 from unittest.mock import MagicMock
@@ -10,7 +9,6 @@
 
 import pytest as pytest
 import requests
-import requests.sessions
 from requests import PreparedRequest
 
 from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
@@ -29,12 +27,6 @@
     InterpolatedRequestOptionsProvider,
 )
 from airbyte_cdk.sources.message import MessageRepository
-from airbyte_cdk.sources.streams.call_rate import (
-    AbstractAPIBudget,
-    HttpAPIBudget,
-    MovingWindowCallRatePolicy,
-    Rate,
-)
 from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
 from airbyte_cdk.sources.streams.http.exceptions import (
     RequestBodyException,
@@ -53,7 +45,6 @@ def factory(
         request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None,
         authenticator: Optional[DeclarativeAuthenticator] = None,
         error_handler: Optional[ErrorHandler] = None,
-        api_budget: Optional[HttpAPIBudget] = None,
         config: Optional[Config] = None,
         parameters: Mapping[str, Any] = None,
         disable_retries: bool = False,
@@ -70,7 +61,6 @@ def factory(
             http_method=http_method,
             request_options_provider=request_options_provider,
             error_handler=error_handler,
-            api_budget=api_budget,
             disable_retries=disable_retries,
             message_repository=message_repository or MagicMock(),
             use_cache=use_cache,
@@ -944,25 +934,3 @@ def test_backoff_strategy_from_manifest_is_respected(http_requester_factory: Any
         http_requester._http_client._request_attempt_count.get(request_mock)
         == http_requester._http_client._max_retries + 1
     )
-
-
-def test_http_requester_with_mock_apibudget(http_requester_factory, monkeypatch):
-    mock_budget = MagicMock(spec=HttpAPIBudget)
-
-    requester = http_requester_factory(
-        url_base="https://example.com",
-        path="test",
-        api_budget=mock_budget,
-    )
-
-    dummy_response = requests.Response()
-    dummy_response.status_code = 200
-    send_mock = MagicMock(return_value=dummy_response)
-    monkeypatch.setattr(requests.Session, "send", send_mock)
-
-    response = requester.send_request()
-
-    assert send_mock.call_count == 1
-    assert response.status_code == 200
-
-    assert mock_budget.acquire_call.call_count == 1
diff --git a/unit_tests/sources/streams/test_call_rate.py b/unit_tests/sources/streams/test_call_rate.py
index 853e2997e..16bce68e3 100644
--- a/unit_tests/sources/streams/test_call_rate.py
+++ b/unit_tests/sources/streams/test_call_rate.py
@@ -17,7 +17,6 @@
     CallRateLimitHit,
     FixedWindowCallRatePolicy,
     HttpRequestMatcher,
-    HttpRequestRegexMatcher,
     MovingWindowCallRatePolicy,
     Rate,
     UnlimitedCallRatePolicy,
@@ -358,90 +357,3 @@ def test_with_cache(self, mocker, requests_mock):
             assert next(records) == {"data": "some_data"}
 
         assert MovingWindowCallRatePolicy.try_acquire.call_count == 1
-
-
-class TestHttpRequestRegexMatcher:
-    """
-    Tests for the new regex-based logic:
-      - Case-insensitive HTTP method matching
-      - Optional url_base (scheme://netloc)
-      - Regex-based path matching
-      - Query params (must be present)
-      - Headers (case-insensitive keys)
-    """
-
-    def test_case_insensitive_method(self):
-        matcher = HttpRequestRegexMatcher(method="GET")
-
-        req_ok = Request("get", "https://example.com/test/path")
-        req_wrong = Request("POST", "https://example.com/test/path")
-
-        assert matcher(req_ok)
-        assert not matcher(req_wrong)
-
-    def test_url_base(self):
-        matcher = HttpRequestRegexMatcher(url_base="https://example.com")
-
-        req_ok = Request("GET", "https://example.com/test/path?foo=bar")
-        req_wrong = Request("GET", "https://another.com/test/path?foo=bar")
-
-        assert matcher(req_ok)
-        assert not matcher(req_wrong)
-
-    def test_url_path_pattern(self):
-        matcher = HttpRequestRegexMatcher(url_path_pattern=r"/test/")
-
-        req_ok = Request("GET", "https://example.com/test/something")
-        req_wrong = Request("GET", "https://example.com/other/something")
-
-        assert matcher(req_ok)
-        assert not matcher(req_wrong)
-
-    def test_query_params(self):
-        matcher = HttpRequestRegexMatcher(params={"foo": "bar"})
-
-        req_ok = Request("GET", "https://example.com/api?foo=bar&extra=123")
-        req_missing = Request("GET", "https://example.com/api?not_foo=bar")
-
-        assert matcher(req_ok)
-        assert not matcher(req_missing)
-
-    def test_headers_case_insensitive(self):
-        matcher = HttpRequestRegexMatcher(headers={"X-Custom-Header": "abc"})
-
-        req_ok = Request(
-            "GET",
-            "https://example.com/api?foo=bar",
-            headers={"x-custom-header": "abc", "other": "123"},
-        )
-        req_wrong = Request("GET", "https://example.com/api", headers={"x-custom-header": "wrong"})
-
-        assert matcher(req_ok)
-        assert not matcher(req_wrong)
-
-    def test_combined_criteria(self):
-        matcher = HttpRequestRegexMatcher(
-            method="GET",
-            url_base="https://example.com",
-            url_path_pattern=r"/test/",
-            params={"foo": "bar"},
-            headers={"X-Test": "123"},
-        )
-
-        req_ok = Request("GET", "https://example.com/test/me?foo=bar", headers={"x-test": "123"})
-        req_bad_base = Request(
-            "GET", "https://other.com/test/me?foo=bar", headers={"x-test": "123"}
-        )
-        req_bad_path = Request("GET", "https://example.com/nope?foo=bar", headers={"x-test": "123"})
-        req_bad_param = Request(
-            "GET", "https://example.com/test/me?extra=xyz", headers={"x-test": "123"}
-        )
-        req_bad_header = Request(
-            "GET", "https://example.com/test/me?foo=bar", headers={"some-other-header": "xyz"}
-        )
-
-        assert matcher(req_ok)
-        assert not matcher(req_bad_base)
-        assert not matcher(req_bad_path)
-        assert not matcher(req_bad_param)
-        assert not matcher(req_bad_header)

From c0bc64538acfbbcf06f7df8908a3ff248f061089 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Wed, 12 Feb 2025 17:44:45 +0200
Subject: [PATCH 12/26] Fix cursor value from record

---
 .../declarative/incremental/concurrent_partition_cursor.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index fc75ecd90..4dc3a6341 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -363,8 +363,8 @@ def observe(self, record: Record) -> None:
                 "Invalid state as stream slices that are emitted should refer to an existing cursor"
             )
 
-        record_cursor = self._connector_state_converter.parse_value(
-            self._cursor_field.extract_value(record)
+        record_cursor = self._connector_state_converter.output_format(
+            self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
         )
         self._update_global_cursor(record_cursor)
         if not self._use_global_cursor:

From 52b95e33d7782c163447a966754dc5156d7b555c Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Thu, 13 Feb 2025 12:02:37 +0200
Subject: [PATCH 13/26] Add throttling for state emitting in
 ConcurrentPerPartitionCursor

---
 .../concurrent_partition_cursor.py            | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index 4dc3a6341..2780218dc 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -5,6 +5,7 @@
 import copy
 import logging
 import threading
+import time
 from collections import OrderedDict
 from copy import deepcopy
 from datetime import timedelta
@@ -59,7 +60,7 @@ class ConcurrentPerPartitionCursor(Cursor):
     """
 
     DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
-    SWITCH_TO_GLOBAL_LIMIT = 1000
+    SWITCH_TO_GLOBAL_LIMIT = 10_000
     _NO_STATE: Mapping[str, Any] = {}
     _NO_CURSOR_STATE: Mapping[str, Any] = {}
     _GLOBAL_STATE_KEY = "state"
@@ -103,6 +104,8 @@ def __init__(
         self._number_of_partitions: int = 0
         self._use_global_cursor: bool = False
         self._partition_serializer = PerPartitionKeySerializer()
+        # Track the last time a state message was emitted
+        self._last_emission_time: float = 0.0
 
         self._set_initial_state(stream_state)
 
@@ -166,9 +169,12 @@ def ensure_at_least_one_state_emitted(self) -> None:
             self._global_cursor = self._new_global_cursor
             self._lookback_window = self._timer.finish()
             self._parent_state = self._partition_router.get_stream_state()
-        self._emit_state_message()
+        self._emit_state_message(throttle=False)
 
-    def _emit_state_message(self) -> None:
+    def _emit_state_message(self, throttle: bool = True) -> None:
+        current_time = time.time()
+        if throttle and current_time - self._last_emission_time <= 60:
+            return
         self._connector_state_manager.update_state_for_stream(
             self._stream_name,
             self._stream_namespace,
@@ -178,6 +184,7 @@ def _emit_state_message(self) -> None:
             self._stream_name, self._stream_namespace
         )
         self._message_repository.emit_message(state_message)
+        self._last_emission_time = current_time
 
     def stream_slices(self) -> Iterable[StreamSlice]:
         if self._timer.is_running():
@@ -242,7 +249,7 @@ def _ensure_partition_limit(self) -> None:
                             partition_key
                         )  # Remove the oldest partition
                         logger.warning(
-                            f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
+                            f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
                         )
                         break
                 else:
@@ -251,7 +258,7 @@ def _ensure_partition_limit(self) -> None:
                         1
                     ]  # Remove the oldest partition
                     logger.warning(
-                        f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
+                        f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
                     )
 
     def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -372,7 +379,7 @@ def observe(self, record: Record) -> None:
                 self._to_partition_key(record.associated_slice.partition)
             ].observe(record)
 
-    def _update_global_cursor(self, value: Mapping[str, Any]) -> None:
+    def _update_global_cursor(self, value: Any) -> None:
         if (
             self._new_global_cursor is None
             or self._new_global_cursor[self.cursor_field.cursor_field_key] < value

From 1166a7a2e68e0713fa191804a4cab844cfdf8c95 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 17 Feb 2025 13:41:05 +0200
Subject: [PATCH 14/26] Fix unit tests

---
 .../concurrent_partition_cursor.py            | 19 ++++--
 .../test_concurrent_perpartitioncursor.py     | 66 ++++++++++++++++---
 2 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index 2780218dc..da12cc05d 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -171,10 +171,21 @@ def ensure_at_least_one_state_emitted(self) -> None:
             self._parent_state = self._partition_router.get_stream_state()
         self._emit_state_message(throttle=False)
 
-    def _emit_state_message(self, throttle: bool = True) -> None:
+    def _throttle_state_message(self) -> Optional[float]:
+        """
+        Throttles the state message emission to once every 60 seconds.
+        """
         current_time = time.time()
-        if throttle and current_time - self._last_emission_time <= 60:
-            return
+        if current_time - self._last_emission_time <= 60:
+            return None
+        return current_time
+
+    def _emit_state_message(self, throttle: bool = True) -> None:
+        if throttle:
+            current_time = self._throttle_state_message()
+            if current_time is None:
+                return
+            self._last_emission_time = current_time
         self._connector_state_manager.update_state_for_stream(
             self._stream_name,
             self._stream_namespace,
@@ -184,7 +195,6 @@ def _emit_state_message(self, throttle: bool = True) -> None:
             self._stream_name, self._stream_namespace
         )
         self._message_repository.emit_message(state_message)
-        self._last_emission_time = current_time
 
     def stream_slices(self) -> Iterable[StreamSlice]:
         if self._timer.is_running():
@@ -358,6 +368,7 @@ def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
             self._new_global_cursor = deepcopy(fixed_global_state)
 
     def observe(self, record: Record) -> None:
+        # ToDo: check number of partitions
         if not self._use_global_cursor and self.limit_reached():
             logger.info(
                 f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index ef06676f5..767d24874 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -3,6 +3,7 @@
 from copy import deepcopy
 from datetime import datetime, timedelta
 from typing import Any, List, Mapping, MutableMapping, Optional, Union
+from unittest.mock import MagicMock, patch
 from urllib.parse import unquote
 
 import pytest
@@ -18,6 +19,7 @@
 from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
     ConcurrentDeclarativeSource,
 )
+from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
 from airbyte_cdk.test.catalog_builder import CatalogBuilder, ConfiguredAirbyteStreamBuilder
 from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
 
@@ -1181,14 +1183,18 @@ def test_incremental_parent_state(
     initial_state,
     expected_state,
 ):
-    run_incremental_parent_state_test(
-        manifest,
-        mock_requests,
-        expected_records,
-        num_intermediate_states,
-        initial_state,
-        [expected_state],
-    )
+    # Patch `_throttle_state_message` so it always returns a float (indicating "no throttle")
+    with patch.object(
+        ConcurrentPerPartitionCursor, "_throttle_state_message", return_value=9999999.0
+    ):
+        run_incremental_parent_state_test(
+            manifest,
+            mock_requests,
+            expected_records,
+            num_intermediate_states,
+            initial_state,
+            [expected_state],
+        )
 
 
 STATE_MIGRATION_EXPECTED_STATE = {
@@ -2967,3 +2973,47 @@ def test_incremental_substream_request_options_provider(
         expected_records,
         expected_state,
     )
+
+
+def test_state_throttling(mocker):
+    """
+    Verifies that _emit_state_message does not emit a new state if less than 60s
+    have passed since last emission, and does emit once 60s or more have passed.
+    """
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=MagicMock(),
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={},
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=MagicMock(),
+        cursor_field=MagicMock(),
+    )
+
+    mock_connector_manager = cursor._connector_state_manager
+    mock_repo = cursor._message_repository
+
+    # Set the last emission time to "0" so we can control offset from that
+    cursor._last_emission_time = 0
+
+    mock_time = mocker.patch("time.time")
+
+    # First attempt: only 10 seconds passed => NO emission
+    mock_time.return_value = 10
+    cursor._emit_state_message()
+    mock_connector_manager.update_state_for_stream.assert_not_called()
+    mock_repo.emit_message.assert_not_called()
+
+    # Second attempt: 30 seconds passed => still NO emission
+    mock_time.return_value = 30
+    cursor._emit_state_message()
+    mock_connector_manager.update_state_for_stream.assert_not_called()
+    mock_repo.emit_message.assert_not_called()
+
+    # Advance time: 70 seconds => exceed 60s => MUST emit
+    mock_time.return_value = 70
+    cursor._emit_state_message()
+    mock_connector_manager.update_state_for_stream.assert_called_once()
+    mock_repo.emit_message.assert_called_once()

From 4a7d9eccb4421c6591835a4ab99e9e4cc22a276c Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 17 Feb 2025 14:05:10 +0200
Subject: [PATCH 15/26] Move switching to global logic

---
 .../concurrent_partition_cursor.py            | 37 +++++++++----------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index da12cc05d..74d7f8893 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -59,8 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
     CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
     """
 
-    DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
-    SWITCH_TO_GLOBAL_LIMIT = 10_000
+    DEFAULT_MAX_PARTITIONS_NUMBER = 200
+    SWITCH_TO_GLOBAL_LIMIT = 100
     _NO_STATE: Mapping[str, Any] = {}
     _NO_CURSOR_STATE: Mapping[str, Any] = {}
     _GLOBAL_STATE_KEY = "state"
@@ -145,19 +145,19 @@ def close_partition(self, partition: Partition) -> None:
             raise ValueError("stream_slice cannot be None")
 
         partition_key = self._to_partition_key(stream_slice.partition)
-        if not self._use_global_cursor:
-            self._cursor_per_partition[partition_key].close_partition(partition=partition)
         with self._lock:
-            self._semaphore_per_partition[partition_key].acquire()
-            cursor = self._cursor_per_partition[partition_key]
-            if (
-                partition_key in self._finished_partitions
-                and self._semaphore_per_partition[partition_key]._value == 0
-            ):
-                self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
             if not self._use_global_cursor:
+                self._cursor_per_partition[partition_key].close_partition(partition=partition)
+                cursor = self._cursor_per_partition[partition_key]
+                if (
+                        partition_key in self._finished_partitions
+                        and self._semaphore_per_partition[partition_key]._value == 0
+                ):
+                    self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
                 self._emit_state_message()
 
+            self._semaphore_per_partition[partition_key].acquire()
+
     def ensure_at_least_one_state_emitted(self) -> None:
         """
         The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
@@ -246,6 +246,13 @@ def _ensure_partition_limit(self) -> None:
         - Logs a warning each time a partition is removed, indicating whether it was finished
           or removed due to being the oldest.
         """
+        if not self._use_global_cursor and self.limit_reached():
+            logger.info(
+                f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
+                f"Switching to global cursor for {self._stream_name}."
+            )
+            self._use_global_cursor = True
+
         with self._lock:
             self._number_of_partitions += 1
             while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
@@ -368,14 +375,6 @@ def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
             self._new_global_cursor = deepcopy(fixed_global_state)
 
     def observe(self, record: Record) -> None:
-        # ToDo: check number of partitions
-        if not self._use_global_cursor and self.limit_reached():
-            logger.info(
-                f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
-                f"Switching to global cursor for {self._stream_name}."
-            )
-            self._use_global_cursor = True
-
         if not record.associated_slice:
             raise ValueError(
                 "Invalid state as stream slices that are emitted should refer to an existing cursor"

From 19ad269f8f802f4701cc5b680eec282fd2183a99 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 17 Feb 2025 14:45:25 +0200
Subject: [PATCH 16/26] Revert test limits

---
 .../declarative/incremental/concurrent_partition_cursor.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index 74d7f8893..ed67e8166 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -59,8 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
     CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
     """
 
-    DEFAULT_MAX_PARTITIONS_NUMBER = 200
-    SWITCH_TO_GLOBAL_LIMIT = 100
+    DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
+    SWITCH_TO_GLOBAL_LIMIT = 10_000
     _NO_STATE: Mapping[str, Any] = {}
     _NO_CURSOR_STATE: Mapping[str, Any] = {}
     _GLOBAL_STATE_KEY = "state"

From 6498528eab15f1bf261df588201a72922d895634 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 17 Feb 2025 15:36:26 +0200
Subject: [PATCH 17/26] Fix format

---
 .../declarative/incremental/concurrent_partition_cursor.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index ed67e8166..84d3cb6e2 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -150,8 +150,8 @@ def close_partition(self, partition: Partition) -> None:
                 self._cursor_per_partition[partition_key].close_partition(partition=partition)
                 cursor = self._cursor_per_partition[partition_key]
                 if (
-                        partition_key in self._finished_partitions
-                        and self._semaphore_per_partition[partition_key]._value == 0
+                    partition_key in self._finished_partitions
+                    and self._semaphore_per_partition[partition_key]._value == 0
                 ):
                     self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
                 self._emit_state_message()

From d3e7fe220eda8f3320c013a5b1cb7439b40db6f3 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 17 Feb 2025 18:03:38 +0200
Subject: [PATCH 18/26] Add parent state updates

---
 .../concurrent_partition_cursor.py            | 49 ++++++++++++++++---
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index ed67e8166..aa3eef02a 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -95,6 +95,10 @@ def __init__(
         # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
         self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
         self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
+
+        # Parent-state tracking: store each partition’s parent state in creation order
+        self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
+
         self._finished_partitions: set[str] = set()
         self._lock = threading.Lock()
         self._timer = Timer()
@@ -154,10 +158,32 @@ def close_partition(self, partition: Partition) -> None:
                         and self._semaphore_per_partition[partition_key]._value == 0
                 ):
                     self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
-                self._emit_state_message()
+
+            self._check_and_update_parent_state()
+
+            self._emit_state_message()
 
             self._semaphore_per_partition[partition_key].acquire()
 
+    def _check_and_update_parent_state(self) -> None:
+        """
+        If all slices for the earliest partitions are closed, pop them from the left
+        of _partition_parent_state_map and update _parent_state to the most recent popped.
+        """
+        last_closed_state = None
+        # We iterate in creation order (left to right) in the OrderedDict
+        for p_key in list(self._partition_parent_state_map.keys()):
+            # If this partition is not fully closed, stop
+            if p_key not in self._finished_partitions or self._semaphore_per_partition[p_key]._value != 0:
+                break
+            # Otherwise, we pop from the left
+            _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
+            last_closed_state = closed_parent_state
+
+        # If we popped at least one partition, update the parent_state to that partition's parent state
+        if last_closed_state is not None:
+            self._parent_state = last_closed_state
+
     def ensure_at_least_one_state_emitted(self) -> None:
         """
         The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
@@ -202,13 +228,17 @@ def stream_slices(self) -> Iterable[StreamSlice]:
 
         slices = self._partition_router.stream_slices()
         self._timer.start()
-        for partition in slices:
-            yield from self._generate_slices_from_partition(partition)
+        for partition, last, parent_state in iterate_with_last_flag_and_state(
+                slices, self._partition_router.get_stream_state
+        ):
+            yield from self._generate_slices_from_partition(partition, parent_state)
 
-    def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
+    def _generate_slices_from_partition(self, partition: StreamSlice, parent_state: Mapping[str, Any]) -> Iterable[StreamSlice]:
         # Ensure the maximum number of partitions is not exceeded
         self._ensure_partition_limit()
 
+        partition_key = self._to_partition_key(partition.partition)
+
         cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
         if not cursor:
             cursor = self._create_cursor(
@@ -216,18 +246,21 @@ def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[St
                 self._lookback_window if self._global_cursor else 0,
             )
             with self._lock:
-                self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
-            self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
+                self._cursor_per_partition[partition_key] = cursor
+            self._semaphore_per_partition[partition_key] = (
                 threading.Semaphore(0)
             )
 
+        with self._lock:
+            self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
+
         for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
             cursor.stream_slices(),
             lambda: None,
         ):
-            self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
+            self._semaphore_per_partition[partition_key].release()
             if is_last_slice:
-                self._finished_partitions.add(self._to_partition_key(partition.partition))
+                self._finished_partitions.add(partition_key)
             yield StreamSlice(
                 partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
             )

From 7b4964edb72b800855114215d710387a529e232c Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Mon, 17 Feb 2025 18:14:19 +0200
Subject: [PATCH 19/26] Move acquiring the semaphore

---
 .../declarative/incremental/concurrent_partition_cursor.py     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index 84d3cb6e2..efa5996b3 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -146,6 +146,7 @@ def close_partition(self, partition: Partition) -> None:
 
         partition_key = self._to_partition_key(stream_slice.partition)
         with self._lock:
+            self._semaphore_per_partition[partition_key].acquire()
             if not self._use_global_cursor:
                 self._cursor_per_partition[partition_key].close_partition(partition=partition)
                 cursor = self._cursor_per_partition[partition_key]
@@ -156,8 +157,6 @@ def close_partition(self, partition: Partition) -> None:
                     self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
                 self._emit_state_message()
 
-            self._semaphore_per_partition[partition_key].acquire()
-
     def ensure_at_least_one_state_emitted(self) -> None:
         """
         The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be

From 203c1312ab97d1c2eec3a349f45edef69a307806 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Tue, 18 Feb 2025 13:39:05 +0200
Subject: [PATCH 20/26] Refactor to store only unique states

---
 .../concurrent_partition_cursor.py            | 55 +++++++++++++------
 .../test_concurrent_perpartitioncursor.py     |  2 +
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index ab9c5258c..1ece3c579 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -166,20 +166,36 @@ def close_partition(self, partition: Partition) -> None:
 
     def _check_and_update_parent_state(self) -> None:
         """
-        If all slices for the earliest partitions are closed, pop them from the left
-        of _partition_parent_state_map and update _parent_state to the most recent popped.
+        Pop the leftmost partition state from _partition_parent_state_map only if
+        *all partitions* up to (and including) that partition key in _semaphore_per_partition
+        are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
         """
         last_closed_state = None
-        # We iterate in creation order (left to right) in the OrderedDict
-        for p_key in list(self._partition_parent_state_map.keys()):
-            # If this partition is not fully closed, stop
-            if p_key not in self._finished_partitions or self._semaphore_per_partition[p_key]._value != 0:
+
+        while self._partition_parent_state_map:
+            # Look at the earliest partition key in creation order
+            earliest_key = next(iter(self._partition_parent_state_map))
+
+            # Verify ALL partitions from the left up to earliest_key are finished
+            all_left_finished = True
+            for p_key, sem in self._semaphore_per_partition.items():
+                # If any earlier partition is still not finished, we must stop
+                if p_key not in self._finished_partitions or sem._value != 0:
+                    all_left_finished = False
+                    break
+                # Once we've reached earliest_key in the semaphore order, we can stop checking
+                if p_key == earliest_key:
+                    break
+
+            # If the partitions up to earliest_key are not all finished, break the while-loop
+            if not all_left_finished:
                 break
-            # Otherwise, we pop from the left
+
+            # Otherwise, pop the leftmost entry from parent-state map
             _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
             last_closed_state = closed_parent_state
 
-        # If we popped at least one partition, update the parent_state to that partition's parent state
+        # Update _parent_state if we actually popped at least one partition
         if last_closed_state is not None:
             self._parent_state = last_closed_state
 
@@ -228,11 +244,13 @@ def stream_slices(self) -> Iterable[StreamSlice]:
         slices = self._partition_router.stream_slices()
         self._timer.start()
         for partition, last, parent_state in iterate_with_last_flag_and_state(
-                slices, self._partition_router.get_stream_state
+            slices, self._partition_router.get_stream_state
         ):
             yield from self._generate_slices_from_partition(partition, parent_state)
 
-    def _generate_slices_from_partition(self, partition: StreamSlice, parent_state: Mapping[str, Any]) -> Iterable[StreamSlice]:
+    def _generate_slices_from_partition(
+        self, partition: StreamSlice, parent_state: Mapping[str, Any]
+    ) -> Iterable[StreamSlice]:
         # Ensure the maximum number of partitions is not exceeded
         self._ensure_partition_limit()
 
@@ -247,12 +265,17 @@ def _generate_slices_from_partition(self, partition: StreamSlice, parent_state:
             with self._lock:
                 self._number_of_partitions += 1
                 self._cursor_per_partition[partition_key] = cursor
-            self._semaphore_per_partition[partition_key] = (
-                threading.Semaphore(0)
-            )
+        self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
 
         with self._lock:
-            self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
+            if (
+                len(self._partition_parent_state_map) == 0
+                or self._partition_parent_state_map[
+                    next(reversed(self._partition_parent_state_map))
+                ]
+                != parent_state
+            ):
+                self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
 
         for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
             cursor.stream_slices(),
@@ -287,7 +310,6 @@ def _ensure_partition_limit(self) -> None:
             self._use_global_cursor = True
 
         with self._lock:
-            self._number_of_partitions += 1
             while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
                 # Try removing finished partitions first
                 for partition_key in list(self._cursor_per_partition.keys()):
@@ -372,9 +394,6 @@ def _set_initial_state(self, stream_state: StreamState) -> None:
                 self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
                     self._create_cursor(state["cursor"])
                 )
-                self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
-                    threading.Semaphore(0)
-                )
 
             # set default state for missing partitions if it is per partition with fallback to global
             if self._GLOBAL_STATE_KEY in stream_state:
diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index 767d24874..c40222291 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -2027,6 +2027,8 @@ def test_incremental_parent_state_no_records(
                                 "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
                             }
                         ],
+                        "state": {},
+                        "use_global_cursor": False,
                         "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
                     }
                 },

From 671fab452c10602b4a7ae5c99797ccfe173d3110 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Tue, 18 Feb 2025 18:43:06 +0200
Subject: [PATCH 21/26] Add intermediate states validation to unit tests

---
 .../test_concurrent_perpartitioncursor.py     | 3521 +++++++++--------
 1 file changed, 1904 insertions(+), 1617 deletions(-)

diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index c40222291..23459366d 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -306,7 +306,7 @@
 
 
 def run_mocked_test(
-    mock_requests, manifest, config, stream_name, initial_state, expected_records, expected_state
+        mock_requests, manifest, config, stream_name, initial_state, expected_records, expected_state
 ):
     """
     Helper function to mock requests, run the test, and verify the results.
@@ -356,15 +356,15 @@ def run_mocked_test(
                 [req for req in m.request_history if unquote(req.url) == unquote(url)]
             )
             assert (
-                request_count == 1
+                    request_count == 1
             ), f"URL {url} was called {request_count} times, expected exactly once."
 
 
 def _run_read(
-    manifest: Mapping[str, Any],
-    config: Mapping[str, Any],
-    stream_name: str,
-    state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
+        manifest: Mapping[str, Any],
+        config: Mapping[str, Any],
+        stream_name: str,
+        state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
 ) -> EntrypointOutput:
     source = ConcurrentDeclarativeSource(
         source_config=manifest, config=config, catalog=None, state=state
@@ -416,8 +416,8 @@ def _run_read(
     INITIAL_STATE_PARTITION_11_CURSOR.replace("Z", "")
 )
 LOOKBACK_DATE = (
-    INITIAL_GLOBAL_CURSOR_DATE - timedelta(days=LOOKBACK_WINDOW_DAYS)
-).isoformat() + "Z"
+                        INITIAL_GLOBAL_CURSOR_DATE - timedelta(days=LOOKBACK_WINDOW_DAYS)
+                ).isoformat() + "Z"
 
 PARTITION_SYNC_START_TIME = "2024-01-02T00:00:00Z"
 
@@ -426,316 +426,316 @@ def _run_read(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_parent_state",
-            SUBSTREAM_MANIFEST_NO_DEPENDENCY,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}",
-                    {
-                        "posts": [
-                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                        ],
-                        "next_page": f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
-                    },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
-                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100",
-                    {
-                        "comments": [
+                "test_incremental_parent_state",
+                SUBSTREAM_MANIFEST_NO_DEPENDENCY,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}",
                             {
-                                "id": 9,
-                                "post_id": 1,
-                                "updated_at": COMMENT_9_OLDEST,  # No requests for comment 9, filtered out due to the date
+                                "posts": [
+                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                                ],
+                                "next_page": f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
                             },
+                    ),
+                    # Fetch the second page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
+                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100",
                             {
-                                "id": 10,
-                                "post_id": 1,
-                                "updated_at": COMMENT_10_UPDATED_AT,
+                                "comments": [
+                                    {
+                                        "id": 9,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_9_OLDEST,  # No requests for comment 9, filtered out due to the date
+                                    },
+                                    {
+                                        "id": 10,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_10_UPDATED_AT,
+                                    },
+                                    {
+                                        "id": 11,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_11_UPDATED_AT,
+                                    },
+                                ],
+                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
                             },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
                             {
-                                "id": 11,
-                                "post_id": 1,
-                                "updated_at": COMMENT_11_UPDATED_AT,
+                                "comments": [
+                                    {
+                                        "id": 12,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_12_UPDATED_AT,
+                                    }
+                                ]
                             },
-                        ],
-                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    {
-                        "comments": [
+                    ),
+                    # Fetch the first page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "id": 12,
-                                "post_id": 1,
-                                "updated_at": COMMENT_12_UPDATED_AT,
-                            }
-                        ]
-                    },
-                ),
-                # Fetch the first page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {
-                        "votes": [
+                                "votes": [
+                                    {
+                                        "id": 100,
+                                        "comment_id": 10,
+                                        "created_at": VOTE_100_CREATED_AT,
+                                    }
+                                ],
+                                "next_page": f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                            },
+                    ),
+                    # Fetch the second page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "id": 100,
-                                "comment_id": 10,
-                                "created_at": VOTE_100_CREATED_AT,
-                            }
-                        ],
-                        "next_page": f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    },
-                ),
-                # Fetch the second page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {
-                        "votes": [
+                                "votes": [
+                                    {
+                                        "id": 101,
+                                        "comment_id": 10,
+                                        "created_at": VOTE_101_CREATED_AT,
+                                    }
+                                ]
+                            },
+                    ),
+                    # Fetch the first page of votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
                             {
-                                "id": 101,
-                                "comment_id": 10,
-                                "created_at": VOTE_101_CREATED_AT,
-                            }
-                        ]
-                    },
-                ),
-                # Fetch the first page of votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {
-                        "votes": [
+                                "votes": [
+                                    {
+                                        "id": 111,
+                                        "comment_id": 11,
+                                        "created_at": VOTE_111_CREATED_AT,
+                                    }
+                                ]
+                            },
+                    ),
+                    # Fetch the first page of votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100",
                             {
-                                "id": 111,
-                                "comment_id": 11,
-                                "created_at": VOTE_111_CREATED_AT,
-                            }
-                        ]
-                    },
-                ),
-                # Fetch the first page of votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100",
-                    {
-                        "comments": [
+                                "comments": [
+                                    {
+                                        "id": 20,
+                                        "post_id": 2,
+                                        "updated_at": COMMENT_20_UPDATED_AT,
+                                    }
+                                ],
+                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                            },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
                             {
-                                "id": 20,
-                                "post_id": 2,
-                                "updated_at": COMMENT_20_UPDATED_AT,
-                            }
-                        ],
-                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    {
-                        "comments": [
+                                "comments": [
+                                    {
+                                        "id": 21,
+                                        "post_id": 2,
+                                        "updated_at": COMMENT_21_UPDATED_AT,
+                                    }
+                                ]
+                            },
+                    ),
+                    # Fetch the first page of votes for comment 20 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
                             {
-                                "id": 21,
-                                "post_id": 2,
-                                "updated_at": COMMENT_21_UPDATED_AT,
-                            }
-                        ]
-                    },
-                ),
-                # Fetch the first page of votes for comment 20 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                    {
-                        "votes": [
+                                "votes": [
+                                    {
+                                        "id": 200,
+                                        "comment_id": 20,
+                                        "created_at": VOTE_200_CREATED_AT,
+                                    }
+                                ]
+                            },
+                    ),
+                    # Fetch the first page of votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
                             {
-                                "id": 200,
-                                "comment_id": 20,
-                                "created_at": VOTE_200_CREATED_AT,
-                            }
-                        ]
+                                "votes": [
+                                    {
+                                        "id": 210,
+                                        "comment_id": 21,
+                                        "created_at": VOTE_210_CREATED_AT,
+                                    }
+                                ]
+                            },
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100",
+                            {
+                                "comments": [
+                                    {
+                                        "id": 30,
+                                        "post_id": 3,
+                                        "updated_at": COMMENT_30_UPDATED_AT,
+                                    }
+                                ]
+                            },
+                    ),
+                    # Fetch the first page of votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                            {
+                                "votes": [
+                                    {
+                                        "id": 300,
+                                        "comment_id": 30,
+                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                                    }
+                                ]
+                            },
+                    ),
+                ],
+                # Expected records
+                [
+                    {
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_100_CREATED_AT,
+                        "id": 100,
                     },
-                ),
-                # Fetch the first page of votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "votes": [
-                            {
-                                "id": 210,
-                                "comment_id": 21,
-                                "created_at": VOTE_210_CREATED_AT,
-                            }
-                        ]
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_101_CREATED_AT,
+                        "id": 101,
                     },
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100",
                     {
-                        "comments": [
-                            {
-                                "id": 30,
-                                "post_id": 3,
-                                "updated_at": COMMENT_30_UPDATED_AT,
-                            }
-                        ]
+                        "comment_id": 11,
+                        "comment_updated_at": COMMENT_11_UPDATED_AT,
+                        "created_at": VOTE_111_CREATED_AT,
+                        "id": 111,
                     },
-                ),
-                # Fetch the first page of votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "votes": [
-                            {
-                                "id": 300,
-                                "comment_id": 30,
-                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                            }
-                        ]
+                        "comment_id": 20,
+                        "comment_updated_at": COMMENT_20_UPDATED_AT,
+                        "created_at": VOTE_200_CREATED_AT,
+                        "id": 200,
                     },
-                ),
-            ],
-            # Expected records
-            [
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_100_CREATED_AT,
-                    "id": 100,
-                },
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_101_CREATED_AT,
-                    "id": 101,
-                },
-                {
-                    "comment_id": 11,
-                    "comment_updated_at": COMMENT_11_UPDATED_AT,
-                    "created_at": VOTE_111_CREATED_AT,
-                    "id": 111,
-                },
-                {
-                    "comment_id": 20,
-                    "comment_updated_at": COMMENT_20_UPDATED_AT,
-                    "created_at": VOTE_200_CREATED_AT,
-                    "id": 200,
-                },
-                {
-                    "comment_id": 21,
-                    "comment_updated_at": COMMENT_21_UPDATED_AT,
-                    "created_at": VOTE_210_CREATED_AT,
-                    "id": 210,
-                },
-                {
-                    "comment_id": 30,
-                    "comment_updated_at": COMMENT_30_UPDATED_AT,
-                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                    "id": 300,
-                },
-            ],
-            # Initial state
-            {
-                # This should not happen since parent state is disabled, but I've added this to validate that and
-                # incoming parent_state is ignored when the parent stream's incremental_dependency is disabled
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
-                },
-                "states": [
                     {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR_TIMESTAMP},
+                        "comment_id": 21,
+                        "comment_updated_at": COMMENT_21_UPDATED_AT,
+                        "created_at": VOTE_210_CREATED_AT,
+                        "id": 210,
                     },
                     {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "comment_id": 30,
+                        "comment_updated_at": COMMENT_30_UPDATED_AT,
+                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                        "id": 300,
                     },
                 ],
-                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR_TIMESTAMP},
-                "lookback_window": 86400,
-            },
-            # Expected state
-            {
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": VOTE_100_CREATED_AT},
+                # Initial state
+                {
+                    # This should not happen since parent state is disabled, but I've added this to validate that and
+                    # incoming parent_state is ignored when the parent stream's incremental_dependency is disabled
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
                     },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR_TIMESTAMP},
                         },
-                        "cursor": {"created_at": VOTE_111_CREATED_AT},
-                    },
-                    {
-                        "partition": {
-                            "id": 12,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
                         },
-                        "cursor": {"created_at": LOOKBACK_DATE},
-                    },
-                    {
-                        "partition": {
-                            "id": 20,
-                            "parent_slice": {"id": 2, "parent_slice": {}},
+                    ],
+                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR_TIMESTAMP},
+                    "lookback_window": 86400,
+                },
+                # Expected state
+                {
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": VOTE_100_CREATED_AT},
                         },
-                        "cursor": {"created_at": VOTE_200_CREATED_AT},
-                    },
-                    {
-                        "partition": {
-                            "id": 21,
-                            "parent_slice": {"id": 2, "parent_slice": {}},
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": VOTE_111_CREATED_AT},
                         },
-                        "cursor": {"created_at": VOTE_210_CREATED_AT},
-                    },
-                    {
-                        "partition": {
-                            "id": 30,
-                            "parent_slice": {"id": 3, "parent_slice": {}},
+                        {
+                            "partition": {
+                                "id": 12,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": LOOKBACK_DATE},
                         },
-                        "cursor": {"created_at": VOTE_300_CREATED_AT},
-                    },
-                ],
-                "use_global_cursor": False,
-                "lookback_window": 1,
-                "parent_state": {},
-                "state": {"created_at": VOTE_100_CREATED_AT},
-            },
+                        {
+                            "partition": {
+                                "id": 20,
+                                "parent_slice": {"id": 2, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": VOTE_200_CREATED_AT},
+                        },
+                        {
+                            "partition": {
+                                "id": 21,
+                                "parent_slice": {"id": 2, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        },
+                        {
+                            "partition": {
+                                "id": 30,
+                                "parent_slice": {"id": 3, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        },
+                    ],
+                    "use_global_cursor": False,
+                    "lookback_window": 1,
+                    "parent_state": {},
+                    "state": {"created_at": VOTE_100_CREATED_AT},
+                },
         ),
     ],
 )
 def test_incremental_parent_state_no_incremental_dependency(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     This is a pretty complicated test that syncs a low-code connector stream with three levels of substreams
@@ -761,12 +761,13 @@ def test_incremental_parent_state_no_incremental_dependency(
 
 
 def run_incremental_parent_state_test(
-    manifest,
-    mock_requests,
-    expected_records,
-    num_intermediate_states,
-    initial_state,
-    expected_states,
+        manifest,
+        mock_requests,
+        expected_records,
+        num_intermediate_states,
+        intermidiate_states,
+        initial_state,
+        expected_states,
 ):
     """
     Run an incremental parent state test for the specified stream.
@@ -784,6 +785,7 @@ def run_incremental_parent_state_test(
         mock_requests (list): A list of tuples containing URL and response data for mocking API requests.
         expected_records (list): The expected records to compare against the output.
         num_intermediate_states (int): The number of intermediate states to expect.
+        intermidiate_states (list): A list of intermediate states to assert
         initial_state (list): The initial state to start the read operation.
         expected_states (list): A list of expected final states after the read operation.
     """
@@ -830,6 +832,12 @@ def run_incremental_parent_state_test(
         # Assert that the number of intermediate states is as expected
         assert len(intermediate_states) - 1 == num_intermediate_states
 
+        # Extract just the Python dict from each state message
+        all_state_dicts = [st[0].stream.stream_state.__dict__ for st in intermediate_states]
+
+        for idx, itermidiate_state in enumerate(all_state_dicts):
+            assert itermidiate_state == intermidiate_states[idx], idx
+
         # For each intermediate state, perform another read starting from that state
         for state, records_before_state in intermediate_states[:-1]:
             output_intermediate = _run_read(manifest, CONFIG, STREAM_NAME, [state])
@@ -848,8 +856,8 @@ def run_incremental_parent_state_test(
                 {orjson.dumps(record): record for record in expected_records}.values()
             )
             assert (
-                sorted(cumulative_records_state_deduped, key=lambda x: x["id"])
-                == sorted(expected_records_set, key=lambda x: x["id"])
+                    sorted(cumulative_records_state_deduped, key=lambda x: x["id"])
+                    == sorted(expected_records_set, key=lambda x: x["id"])
             ), f"Records mismatch with intermediate state {state}. Expected {expected_records}, got {cumulative_records_state_deduped}"
 
             # Store the final state after each intermediate read
@@ -862,336 +870,615 @@ def run_incremental_parent_state_test(
         # Assert that the final state matches the expected state for all runs
         for i, final_state in enumerate(final_states):
             assert (
-                final_state in expected_states
+                    final_state in expected_states
             ), f"Final state mismatch at run {i + 1}. Expected {expected_states}, got {final_state}"
 
 
-@pytest.mark.parametrize(
-    "test_name, manifest, mock_requests, expected_records, num_intermediate_states, initial_state, expected_state",
-    [
-        (
-            "test_incremental_parent_state",
-            SUBSTREAM_MANIFEST,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                    {
-                        "posts": [
-                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                        ],
-                        "next_page": (
-                            f"https://api.example.com/community/posts"
-                            f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
-                        ),
-                    },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
-                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100",
-                    {
-                        "comments": [
-                            {
-                                "id": 9,
-                                "post_id": 1,
-                                "updated_at": COMMENT_9_OLDEST,
-                            },
-                            {
-                                "id": 10,
-                                "post_id": 1,
-                                "updated_at": COMMENT_10_UPDATED_AT,
-                            },
-                            {
-                                "id": 11,
-                                "post_id": 1,
-                                "updated_at": COMMENT_11_UPDATED_AT,
-                            },
-                        ],
-                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+INITIAL_STATE = {
+    "parent_state": {
+        "post_comments": {
+            "states": [
+                {
+                    "partition": {"id": 1, "parent_slice": {}},
+                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                }
+            ],
+            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+        }
+    },
+    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+    "states": [
+        {
+            "partition": {
+                "id": 10,
+                "parent_slice": {"id": 1, "parent_slice": {}},
+            },
+            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+        },
+        {
+            "partition": {
+                "id": 11,
+                "parent_slice": {"id": 1, "parent_slice": {}},
+            },
+            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+        },
+    ],
+    "lookback_window": 86400,
+}
+
+INTERMEDIATE_STATES = [
+    {
+        "use_global_cursor": False,
+        "states": [
+            {
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-03T00:00:02Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-03T00:00:02Z"},
+        "lookback_window": 86400,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {},
+                "states": [
                     {
-                        "votes": [
-                            {
-                                "id": 100,
-                                "comment_id": 10,
-                                "created_at": VOTE_100_CREATED_AT,
-                            }
-                        ],
-                        "next_page": (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                        ),
-                    },
-                ),
-                # Fetch the second page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/11/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100",
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2023-01-04T00:00:00Z"},
+                    }
+                ],
+                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
+            }
+        },
+    },
+    {
+        "use_global_cursor": False,
+        "states": [
+            {
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-03T00:00:02Z"},
+        "lookback_window": 86400,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {},
+                "states": [
                     {
-                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 20 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2023-01-04T00:00:00Z"},
+                    }
+                ],
+                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
+            }
+        },
+    },
+    {
+        "use_global_cursor": False,
+        "states": [
+            {
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-03T00:00:02Z"},
+        "lookback_window": 86400,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {},
+                "states": [
                     {
-                        "votes": [
-                            {
-                                "id": 300,
-                                "comment_id": 30,
-                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                            }
-                        ]
-                    },
-                ),
-                # Requests with intermediate states
-                # Fetch votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={VOTE_100_CREATED_AT}",
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2023-01-04T00:00:00Z"},
+                    }
+                ],
+                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
+            }
+        },
+    },
+    {
+        "use_global_cursor": False,
+        "states": [
+            {
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
+            },
+            {
+                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-03T00:00:02Z"},
+        "lookback_window": 86400,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {},
+                "states": [
                     {
-                        "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
-                    },
-                ),
-                # Fetch votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
+                    }
+                ],
+                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
+            }
+        },
+    },
+    {
+        "use_global_cursor": False,
+        "states": [
+            {
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
+            },
+            {
+                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:15Z"},
+            },
+            {
+                "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-03T00:00:02Z"},
+        "lookback_window": 86400,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {},
+                "states": [
                     {
-                        "votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}],
-                    },
-                ),
-                # Fetch votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
+                    }
+                ],
+                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
+            }
+        },
+    },
+    {
+        "use_global_cursor": False,
+        "states": [
+            {
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
+            },
+            {
+                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:15Z"},
+            },
+            {
+                "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-10T00:00:00Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-03T00:00:02Z"},
+        "lookback_window": 86400,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {},
+                "states": [
                     {
-                        "votes": [],
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
                     },
-                ),
-                # Fetch votes for comment 20 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={VOTE_200_CREATED_AT}",
-                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                ),
-                # Fetch votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={VOTE_210_CREATED_AT}",
-                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                ),
-                # Fetch votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={VOTE_300_CREATED_AT}",
                     {
-                        "votes": [
-                            {
-                                "id": 300,
-                                "comment_id": 30,
-                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                            }
-                        ]
+                        "partition": {"id": 2, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-22T00:00:00Z"},
                     },
-                ),
-            ],
-            # Expected records
-            [
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_100_CREATED_AT,
-                    "id": 100,
-                },
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_101_CREATED_AT,
-                    "id": 101,
-                },
-                {
-                    "comment_id": 11,
-                    "comment_updated_at": COMMENT_11_UPDATED_AT,
-                    "created_at": VOTE_111_CREATED_AT,
-                    "id": 111,
-                },
-                {
-                    "comment_id": 20,
-                    "comment_updated_at": COMMENT_20_UPDATED_AT,
-                    "created_at": VOTE_200_CREATED_AT,
-                    "id": 200,
-                },
-                {
-                    "comment_id": 21,
-                    "comment_updated_at": COMMENT_21_UPDATED_AT,
-                    "created_at": VOTE_210_CREATED_AT,
-                    "id": 210,
-                },
-                {
-                    "comment_id": 30,
-                    "comment_updated_at": COMMENT_30_UPDATED_AT,
-                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                    "id": 300,
-                },
-            ],
-            # Number of intermediate states - 6 as number of parent partitions
-            6,
-            # Initial state
+                ],
+                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
+            }
+        },
+    },
+    {
+        "use_global_cursor": False,
+        "states": [
             {
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
-                },
-                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
+            },
+            {
+                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
+            },
+            {
+                "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-12T00:00:15Z"},
+            },
+            {
+                "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                "cursor": {"created_at": "2024-01-10T00:00:00Z"},
+            },
+        ],
+        "state": {"created_at": "2024-01-15T00:00:00Z"},
+        "lookback_window": 1,
+        "parent_state": {
+            "post_comments": {
+                "use_global_cursor": False,
+                "state": {"updated_at": "2024-01-25T00:00:00Z"},
+                "lookback_window": 1,
                 "states": [
                     {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                        "partition": {"id": 1, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
                     },
                     {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "partition": {"id": 2, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-22T00:00:00Z"},
+                    },
+                    {
+                        "partition": {"id": 3, "parent_slice": {}},
+                        "cursor": {"updated_at": "2024-01-09T00:00:00Z"},
                     },
                 ],
-                "lookback_window": 86400,
-            },
-            # Expected state
-            {
-                "state": {"created_at": VOTE_100_CREATED_AT},
-                "parent_state": {
-                    "post_comments": {
-                        "use_global_cursor": False,
-                        "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
-                        "parent_state": {
-                            "posts": {"updated_at": POST_1_UPDATED_AT}
-                        },  # post 1 is the latest
-                        "lookback_window": 1,
-                        "states": [
+                "parent_state": {"posts": {"updated_at": "2024-01-30T00:00:00Z"}},
+            }
+        },
+    },
+]
+
+
+@pytest.mark.parametrize(
+    "test_name, manifest, mock_requests, expected_records, num_intermediate_states, intermidiate_states, initial_state, expected_state",
+    [
+        (
+                "test_incremental_parent_state",
+                SUBSTREAM_MANIFEST,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                            {
+                                "posts": [
+                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts"
+                                        f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
+                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100",
+                            {
+                                "comments": [
+                                    {
+                                        "id": 9,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_9_OLDEST,
+                                    },
+                                    {
+                                        "id": 10,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_10_UPDATED_AT,
+                                    },
+                                    {
+                                        "id": 11,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_11_UPDATED_AT,
+                                    },
+                                ],
+                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                            },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                                "votes": [
+                                    {
+                                        "id": 100,
+                                        "comment_id": 10,
+                                        "created_at": VOTE_100_CREATED_AT,
+                                    }
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts/1/comments/10/votes"
+                                        f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                                ),
                             },
+                    ),
+                    # Fetch the second page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/11/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100",
                             {
-                                "partition": {"id": 2, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
                             },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 20 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
                             {
-                                "partition": {"id": 3, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                                "votes": [
+                                    {
+                                        "id": 300,
+                                        "comment_id": 30,
+                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                                    }
+                                ]
                             },
-                        ],
-                    }
-                },
-                "lookback_window": 1,
-                "use_global_cursor": False,
-                "states": [
+                    ),
+                    # Requests with intermediate states
+                    # Fetch votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={VOTE_100_CREATED_AT}",
+                            {
+                                "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
+                            },
+                    ),
+                    # Fetch votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                            {
+                                "votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}],
+                            },
+                    ),
+                    # Fetch votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                            {
+                                "votes": [],
+                            },
+                    ),
+                    # Fetch votes for comment 20 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={VOTE_200_CREATED_AT}",
+                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                    ),
+                    # Fetch votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={VOTE_210_CREATED_AT}",
+                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                    ),
+                    # Fetch votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={VOTE_300_CREATED_AT}",
+                            {
+                                "votes": [
+                                    {
+                                        "id": 300,
+                                        "comment_id": 30,
+                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                                    }
+                                ]
+                            },
+                    ),
+                ],
+                # Expected records
+                [
                     {
-                        "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_100_CREATED_AT},
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_100_CREATED_AT,
+                        "id": 100,
                     },
                     {
-                        "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_111_CREATED_AT},
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_101_CREATED_AT,
+                        "id": 101,
                     },
                     {
-                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": LOOKBACK_DATE},
+                        "comment_id": 11,
+                        "comment_updated_at": COMMENT_11_UPDATED_AT,
+                        "created_at": VOTE_111_CREATED_AT,
+                        "id": 111,
                     },
                     {
-                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_200_CREATED_AT},
+                        "comment_id": 20,
+                        "comment_updated_at": COMMENT_20_UPDATED_AT,
+                        "created_at": VOTE_200_CREATED_AT,
+                        "id": 200,
                     },
                     {
-                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        "comment_id": 21,
+                        "comment_updated_at": COMMENT_21_UPDATED_AT,
+                        "created_at": VOTE_210_CREATED_AT,
+                        "id": 210,
                     },
                     {
-                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        "comment_id": 30,
+                        "comment_updated_at": COMMENT_30_UPDATED_AT,
+                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                        "id": 300,
                     },
                 ],
-            },
+                # Number of intermediate states - 6 as number of parent partitions
+                6,
+                # Intermediate states
+                INTERMEDIATE_STATES,
+                # Initial state
+                INITIAL_STATE,
+                # Expected state
+                {
+                    "state": {"created_at": VOTE_100_CREATED_AT},
+                    "parent_state": {
+                        "post_comments": {
+                            "use_global_cursor": False,
+                            "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
+                            "parent_state": {
+                                "posts": {"updated_at": POST_1_UPDATED_AT}
+                            },  # post 1 is the latest
+                            "lookback_window": 1,
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                                },
+                                {
+                                    "partition": {"id": 2, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                                },
+                                {
+                                    "partition": {"id": 3, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                                },
+                            ],
+                        }
+                    },
+                    "lookback_window": 1,
+                    "use_global_cursor": False,
+                    "states": [
+                        {
+                            "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_100_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_111_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": LOOKBACK_DATE},
+                        },
+                        {
+                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_200_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        },
+                    ],
+                },
         ),
     ],
 )
 def test_incremental_parent_state(
-    test_name,
-    manifest,
-    mock_requests,
-    expected_records,
-    num_intermediate_states,
-    initial_state,
-    expected_state,
+        test_name,
+        manifest,
+        mock_requests,
+        expected_records,
+        num_intermediate_states,
+        intermidiate_states,
+        initial_state,
+        expected_state,
 ):
     # Patch `_throttle_state_message` so it always returns a float (indicating "no throttle")
     with patch.object(
-        ConcurrentPerPartitionCursor, "_throttle_state_message", return_value=9999999.0
+            ConcurrentPerPartitionCursor, "_throttle_state_message", return_value=9999999.0
     ):
         run_incremental_parent_state_test(
             manifest,
             mock_requests,
             expected_records,
             num_intermediate_states,
+            intermidiate_states,
             initial_state,
             [expected_state],
         )
@@ -1259,166 +1546,166 @@ def test_incremental_parent_state(
     "test_name, manifest, mock_requests, expected_records",
     [
         (
-            "test_incremental_parent_state",
-            SUBSTREAM_MANIFEST,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                    {
-                        "posts": [
-                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                        ],
-                        "next_page": (
+                "test_incremental_parent_state",
+                SUBSTREAM_MANIFEST,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {
+                                "posts": [
+                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts?per_page=100"
+                                        f"&start_time={PARTITION_SYNC_START_TIME}&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of posts
+                    (
                             f"https://api.example.com/community/posts?per_page=100"
-                            f"&start_time={PARTITION_SYNC_START_TIME}&page=2"
-                        ),
+                            f"&start_time={PARTITION_SYNC_START_TIME}&page=2",
+                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100",
+                            {
+                                "comments": [
+                                    {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
+                                    {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
+                                    {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        "https://api.example.com/community/posts/1/comments"
+                                        "?per_page=100&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {
+                                "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts/1/comments/10/votes"
+                                        f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}",
+                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/11/votes"
+                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/12/votes"
+                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100",
+                            {
+                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                                "next_page": (
+                                        "https://api.example.com/community/posts/2/comments"
+                                        "?per_page=100&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 20 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/20/votes"
+                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/21/votes"
+                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts/3/comments/30/votes"
+                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                            {
+                                "votes": [
+                                    {
+                                        "id": 300,
+                                        "comment_id": 30,
+                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                                    }
+                                ]
+                            },
+                    ),
+                ],
+                # Expected records
+                [
+                    {
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_100_CREATED_AT,
+                        "id": 100,
                     },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100"
-                    f"&start_time={PARTITION_SYNC_START_TIME}&page=2",
-                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100",
                     {
-                        "comments": [
-                            {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
-                            {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
-                            {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
-                        ],
-                        "next_page": (
-                            "https://api.example.com/community/posts/1/comments"
-                            "?per_page=100&page=2"
-                        ),
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_101_CREATED_AT,
+                        "id": 101,
                     },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
                     {
-                        "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
-                        "next_page": (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}"
-                        ),
+                        "comment_id": 11,
+                        "comment_updated_at": COMMENT_11_UPDATED_AT,
+                        "created_at": VOTE_111_CREATED_AT,
+                        "id": 111,
                     },
-                ),
-                # Fetch the second page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}",
-                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/11/votes"
-                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/12/votes"
-                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100",
                     {
-                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                        "next_page": (
-                            "https://api.example.com/community/posts/2/comments"
-                            "?per_page=100&page=2"
-                        ),
+                        "comment_id": 20,
+                        "comment_updated_at": COMMENT_20_UPDATED_AT,
+                        "created_at": VOTE_200_CREATED_AT,
+                        "id": 200,
                     },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 20 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/20/votes"
-                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/21/votes"
-                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts/3/comments/30/votes"
-                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
                     {
-                        "votes": [
-                            {
-                                "id": 300,
-                                "comment_id": 30,
-                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                            }
-                        ]
+                        "comment_id": 21,
+                        "comment_updated_at": COMMENT_21_UPDATED_AT,
+                        "created_at": VOTE_210_CREATED_AT,
+                        "id": 210,
                     },
-                ),
-            ],
-            # Expected records
-            [
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_100_CREATED_AT,
-                    "id": 100,
-                },
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_101_CREATED_AT,
-                    "id": 101,
-                },
-                {
-                    "comment_id": 11,
-                    "comment_updated_at": COMMENT_11_UPDATED_AT,
-                    "created_at": VOTE_111_CREATED_AT,
-                    "id": 111,
-                },
-                {
-                    "comment_id": 20,
-                    "comment_updated_at": COMMENT_20_UPDATED_AT,
-                    "created_at": VOTE_200_CREATED_AT,
-                    "id": 200,
-                },
-                {
-                    "comment_id": 21,
-                    "comment_updated_at": COMMENT_21_UPDATED_AT,
-                    "created_at": VOTE_210_CREATED_AT,
-                    "id": 210,
-                },
-                {
-                    "comment_id": 30,
-                    "comment_updated_at": COMMENT_30_UPDATED_AT,
-                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                    "id": 300,
-                },
-            ],
+                    {
+                        "comment_id": 30,
+                        "comment_updated_at": COMMENT_30_UPDATED_AT,
+                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                        "id": 300,
+                    },
+                ],
         ),
     ],
 )
@@ -1427,40 +1714,40 @@ def test_incremental_parent_state(
     [
         ({"created_at": PARTITION_SYNC_START_TIME}, STATE_MIGRATION_EXPECTED_STATE),
         (
-            {
-                "state": {"created_at": PARTITION_SYNC_START_TIME},
-                "lookback_window": 0,
-                "use_global_cursor": False,
-                "parent_state": {
-                    "post_comments": {
-                        "state": {"updated_at": PARTITION_SYNC_START_TIME},
-                        "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
-                        "lookback_window": 0,
-                    }
+                {
+                    "state": {"created_at": PARTITION_SYNC_START_TIME},
+                    "lookback_window": 0,
+                    "use_global_cursor": False,
+                    "parent_state": {
+                        "post_comments": {
+                            "state": {"updated_at": PARTITION_SYNC_START_TIME},
+                            "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
+                            "lookback_window": 0,
+                        }
+                    },
                 },
-            },
-            STATE_MIGRATION_EXPECTED_STATE,
+                STATE_MIGRATION_EXPECTED_STATE,
         ),
         (
-            {
-                "state": {"created_at": PARTITION_SYNC_START_TIME},
-                "lookback_window": 0,
-                "use_global_cursor": True,
-                "parent_state": {
-                    "post_comments": {
-                        "state": {"updated_at": PARTITION_SYNC_START_TIME},
-                        "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
-                        "lookback_window": 0,
-                    }
+                {
+                    "state": {"created_at": PARTITION_SYNC_START_TIME},
+                    "lookback_window": 0,
+                    "use_global_cursor": True,
+                    "parent_state": {
+                        "post_comments": {
+                            "state": {"updated_at": PARTITION_SYNC_START_TIME},
+                            "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
+                            "lookback_window": 0,
+                        }
+                    },
                 },
-            },
-            STATE_MIGRATION_GLOBAL_EXPECTED_STATE,
+                STATE_MIGRATION_GLOBAL_EXPECTED_STATE,
         ),
         (
-            {
-                "state": {"created_at": PARTITION_SYNC_START_TIME},
-            },
-            STATE_MIGRATION_EXPECTED_STATE,
+                {
+                    "state": {"created_at": PARTITION_SYNC_START_TIME},
+                },
+                STATE_MIGRATION_EXPECTED_STATE,
         ),
     ],
     ids=[
@@ -1471,7 +1758,7 @@ def test_incremental_parent_state(
     ],
 )
 def test_incremental_parent_state_migration(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental partition router with parent state migration
@@ -1491,101 +1778,101 @@ def test_incremental_parent_state_migration(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_parent_state",
-            SUBSTREAM_MANIFEST,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                    {
-                        "posts": [],
-                        "next_page": (
+                "test_incremental_parent_state",
+                SUBSTREAM_MANIFEST,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                            {
+                                "posts": [],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts?per_page=100"
+                                        f"&start_time={PARENT_POSTS_CURSOR}&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of posts
+                    (
                             f"https://api.example.com/community/posts?per_page=100"
-                            f"&start_time={PARENT_POSTS_CURSOR}&page=2"
-                        ),
+                            f"&start_time={PARENT_POSTS_CURSOR}&page=2",
+                            {"posts": []},
+                    ),
+                ],
+                # Expected records (empty)
+                [],
+                # Initial state
+                {
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
                     },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100"
-                    f"&start_time={PARENT_POSTS_CURSOR}&page=2",
-                    {"posts": []},
-                ),
-            ],
-            # Expected records (empty)
-            [],
-            # Initial state
-            {
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
-                },
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
                         },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                    },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
                         },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                ],
-                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-                "lookback_window": 1,
-            },
-            # Expected state
-            {
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "state": {},
-                        "use_global_cursor": False,
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
+                    ],
+                    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                    "lookback_window": 1,
                 },
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                # Expected state
+                {
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "state": {},
+                            "use_global_cursor": False,
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
                     },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
                         },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                ],
-                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-                "lookback_window": 1,
-                "use_global_cursor": False,
-            },
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                    ],
+                    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                    "lookback_window": 1,
+                    "use_global_cursor": False,
+                },
         ),
     ],
 )
 def test_incremental_parent_state_no_slices(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental partition router with no parent records
@@ -1605,217 +1892,217 @@ def test_incremental_parent_state_no_slices(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_parent_state",
-            SUBSTREAM_MANIFEST,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                    {
-                        "posts": [
-                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                        ],
-                        "next_page": (
+                "test_incremental_parent_state",
+                SUBSTREAM_MANIFEST,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                            {
+                                "posts": [
+                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts?per_page=100"
+                                        f"&start_time={PARENT_POSTS_CURSOR}&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of posts
+                    (
                             f"https://api.example.com/community/posts?per_page=100"
-                            f"&start_time={PARENT_POSTS_CURSOR}&page=2"
-                        ),
-                    },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100"
-                    f"&start_time={PARENT_POSTS_CURSOR}&page=2",
-                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100",
-                    {
-                        "comments": [
-                            {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
-                            {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
-                            {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
-                        ],
-                        "next_page": (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
-                        ),
-                    },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {
-                        "votes": [],
-                        "next_page": (
+                            f"&start_time={PARENT_POSTS_CURSOR}&page=2",
+                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100",
+                            {
+                                "comments": [
+                                    {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
+                                    {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
+                                    {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 10 of post 1
+                    (
                             f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                        ),
-                    },
-                ),
-                # Fetch the second page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/11/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/12/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100",
-                    {
-                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                        "next_page": (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
-                        ),
-                    },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 20 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/20/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/21/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts/3/comments/30/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": []},
-                ),
-            ],
-            # Expected records
-            [],
-            # Initial state
-            {
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
-                },
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                    },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
+                                "votes": [],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts/1/comments/10/votes"
+                                        f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/11/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/12/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100",
+                            {
+                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                                "next_page": (
+                                        "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 20 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/20/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/21/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts/3/comments/30/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": []},
+                    ),
                 ],
-                "use_global_cursor": False,
-                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                "lookback_window": 0,
-            },
-            # Expected state
-            {
-                "lookback_window": 1,
-                "use_global_cursor": False,
-                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                # Expected records
+                [],
+                # Initial state
+                {
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
                     },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
                         },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                    {
-                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                    {
-                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                    {
-                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                    {
-                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                ],
-                "parent_state": {
-                    "post_comments": {
-                        "use_global_cursor": False,
-                        "state": {"updated_at": COMMENT_10_UPDATED_AT},
-                        "parent_state": {"posts": {"updated_at": POST_1_UPDATED_AT}},
-                        "lookback_window": 1,
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
                             },
-                            {
-                                "partition": {"id": 2, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                    ],
+                    "use_global_cursor": False,
+                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    "lookback_window": 0,
+                },
+                # Expected state
+                {
+                    "lookback_window": 1,
+                    "use_global_cursor": False,
+                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
                             },
-                            {
-                                "partition": {"id": 3, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                        },
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
                             },
-                        ],
-                    }
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                        {
+                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                        {
+                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                        {
+                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                        {
+                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                    ],
+                    "parent_state": {
+                        "post_comments": {
+                            "use_global_cursor": False,
+                            "state": {"updated_at": COMMENT_10_UPDATED_AT},
+                            "parent_state": {"posts": {"updated_at": POST_1_UPDATED_AT}},
+                            "lookback_window": 1,
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                                },
+                                {
+                                    "partition": {"id": 2, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                                },
+                                {
+                                    "partition": {"id": 3, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                                },
+                            ],
+                        }
+                    },
                 },
-            },
         ),
     ],
 )
 def test_incremental_parent_state_no_records(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental partition router with no child records
@@ -1835,238 +2122,238 @@ def test_incremental_parent_state_no_records(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_parent_state",
-            SUBSTREAM_MANIFEST,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                "test_incremental_parent_state",
+                SUBSTREAM_MANIFEST,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                            {
+                                "posts": [
+                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
+                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100",
+                            {
+                                "comments": [
+                                    {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
+                                    {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
+                                    {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                            {
+                                "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts/1/comments/10/votes"
+                                        f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/11/votes"
+                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100",
+                            {
+                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                                "next_page": (
+                                        "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
+                                ),
+                            },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 20 of post 2 - 404 error
+                    (
+                            f"https://api.example.com/community/posts/2/comments/20/votes"
+                            f"?per_page=100&start_time={LOOKBACK_DATE}",
+                            None,
+                    ),
+                    # Fetch the first page of votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts/2/comments/21/votes"
+                            f"?per_page=100&start_time={LOOKBACK_DATE}",
+                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts/3/comments/30/votes"
+                            f"?per_page=100&start_time={LOOKBACK_DATE}",
+                            {
+                                "votes": [
+                                    {
+                                        "id": 300,
+                                        "comment_id": 30,
+                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                                    }
+                                ]
+                            },
+                    ),
+                ],
+                # Expected records
+                [
                     {
-                        "posts": [
-                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                        ],
-                        "next_page": (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
-                        ),
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_100_CREATED_AT,
+                        "id": 100,
                     },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
-                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100",
                     {
-                        "comments": [
-                            {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
-                            {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
-                            {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
-                        ],
-                        "next_page": (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
-                        ),
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_101_CREATED_AT,
+                        "id": 101,
                     },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                     {
-                        "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
-                        "next_page": (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                        ),
+                        "comment_id": 11,
+                        "comment_updated_at": COMMENT_11_UPDATED_AT,
+                        "created_at": VOTE_111_CREATED_AT,
+                        "id": 111,
                     },
-                ),
-                # Fetch the second page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/10/votes"
-                    f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/11/votes"
-                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100",
                     {
-                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                        "next_page": (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
-                        ),
+                        "comment_id": 21,
+                        "comment_updated_at": COMMENT_21_UPDATED_AT,
+                        "created_at": VOTE_210_CREATED_AT,
+                        "id": 210,
                     },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 20 of post 2 - 404 error
-                (
-                    f"https://api.example.com/community/posts/2/comments/20/votes"
-                    f"?per_page=100&start_time={LOOKBACK_DATE}",
-                    None,
-                ),
-                # Fetch the first page of votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts/2/comments/21/votes"
-                    f"?per_page=100&start_time={LOOKBACK_DATE}",
-                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts/3/comments/30/votes"
-                    f"?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "votes": [
-                            {
-                                "id": 300,
-                                "comment_id": 30,
-                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                            }
-                        ]
+                        "comment_id": 30,
+                        "comment_updated_at": COMMENT_30_UPDATED_AT,
+                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                        "id": 300,
                     },
-                ),
-            ],
-            # Expected records
-            [
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_100_CREATED_AT,
-                    "id": 100,
-                },
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_101_CREATED_AT,
-                    "id": 101,
-                },
-                {
-                    "comment_id": 11,
-                    "comment_updated_at": COMMENT_11_UPDATED_AT,
-                    "created_at": VOTE_111_CREATED_AT,
-                    "id": 111,
-                },
+                ],
+                # Initial state
                 {
-                    "comment_id": 21,
-                    "comment_updated_at": COMMENT_21_UPDATED_AT,
-                    "created_at": VOTE_210_CREATED_AT,
-                    "id": 210,
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
+                    },
+                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    "lookback_window": 86400,
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                        },
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                    ],
                 },
+                # Expected state
                 {
-                    "comment_id": 30,
-                    "comment_updated_at": COMMENT_30_UPDATED_AT,
-                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                    "id": 300,
-                },
-            ],
-            # Initial state
-            {
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
-                },
-                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                "lookback_window": 86400,
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    # The global state, lookback window and the parent state are the same because sync failed for comment 20
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "state": {},
+                            "use_global_cursor": False,
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
                     },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
+                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    "lookback_window": 86400,
+                    "use_global_cursor": False,
+                    "states": [
+                        {
+                            "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_100_CREATED_AT},
                         },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                ],
-            },
-            # Expected state
-            {
-                # The global state, lookback window and the parent state are the same because sync failed for comment 20
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "state": {},
-                        "use_global_cursor": False,
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
+                        {
+                            "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_111_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": LOOKBACK_DATE},
+                        },
+                        {
+                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": LOOKBACK_DATE},
+                        },
+                        {
+                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        },
+                    ],
                 },
-                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                "lookback_window": 86400,
-                "use_global_cursor": False,
-                "states": [
-                    {
-                        "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_100_CREATED_AT},
-                    },
-                    {
-                        "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_111_CREATED_AT},
-                    },
-                    {
-                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": LOOKBACK_DATE},
-                    },
-                    {
-                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": LOOKBACK_DATE},
-                    },
-                    {
-                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_210_CREATED_AT},
-                    },
-                    {
-                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_300_CREATED_AT},
-                    },
-                ],
-            },
         ),
     ],
 )
 def test_incremental_substream_error(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     run_mocked_test(
         mock_requests,
@@ -2237,85 +2524,85 @@ def test_incremental_substream_error(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_parent_state",
-            LISTPARTITION_MANIFEST,
-            [
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-24T00:00:00Z",
-                    {
-                        "comments": [
-                            {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
-                            {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                            {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                        ],
-                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
-                    },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
-                    {"comments": [{"id": 12, "post_id": 1, "updated_at": "2024-01-23T00:00:00Z"}]},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
-                    {
-                        "comments": [
-                            {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
-                        ],
-                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                    },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
-                ),
-            ],
-            # Expected records
-            [
-                {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
-                {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
-            ],
-            # Initial state
-            {
-                "state": {"updated_at": "2024-01-08T00:00:00Z"},
-                "states": [
-                    {
-                        "cursor": {"updated_at": "2024-01-24T00:00:00Z"},
-                        "partition": {"id": "1"},
-                    },
-                    {
-                        "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
-                        "partition": {"id": "2"},
-                    },
+                "test_incremental_parent_state",
+                LISTPARTITION_MANIFEST,
+                [
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-24T00:00:00Z",
+                            {
+                                "comments": [
+                                    {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
+                                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                                ],
+                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
+                            },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
+                            {"comments": [{"id": 12, "post_id": 1, "updated_at": "2024-01-23T00:00:00Z"}]},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
+                            {
+                                "comments": [
+                                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
+                                ],
+                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                            },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
+                    ),
                 ],
-                "use_global_cursor": False,
-            },
-            # Expected state
-            {
-                "use_global_cursor": False,
-                "lookback_window": 1,
-                "state": {"updated_at": "2024-01-25T00:00:00Z"},
-                "states": [
-                    {"cursor": {"updated_at": "2024-01-25T00:00:00Z"}, "partition": {"id": "1"}},
-                    {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
-                    {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
+                # Expected records
+                [
+                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
+                    {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
                 ],
-            },
+                # Initial state
+                {
+                    "state": {"updated_at": "2024-01-08T00:00:00Z"},
+                    "states": [
+                        {
+                            "cursor": {"updated_at": "2024-01-24T00:00:00Z"},
+                            "partition": {"id": "1"},
+                        },
+                        {
+                            "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
+                            "partition": {"id": "2"},
+                        },
+                    ],
+                    "use_global_cursor": False,
+                },
+                # Expected state
+                {
+                    "use_global_cursor": False,
+                    "lookback_window": 1,
+                    "state": {"updated_at": "2024-01-25T00:00:00Z"},
+                    "states": [
+                        {"cursor": {"updated_at": "2024-01-25T00:00:00Z"}, "partition": {"id": "1"}},
+                        {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
+                        {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
+                    ],
+                },
         ),
     ],
 )
 def test_incremental_list_partition_router(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test ConcurrentPerPartitionCursor with ListPartitionRouter
@@ -2335,85 +2622,85 @@ def test_incremental_list_partition_router(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_error_handling",
-            LISTPARTITION_MANIFEST,
-            [
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-20T00:00:00Z",
-                    {
-                        "comments": [
-                            {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
-                            {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                            {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                        ],
-                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
-                    },
-                ),
-                # Error response for the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
-                    None,  # Simulate a network error or an empty response
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
-                    {
-                        "comments": [
-                            {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
-                        ],
-                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                    },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
-                ),
-            ],
-            # Expected records
-            [
-                {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
-                {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
-            ],
-            # Initial state
-            {
-                "state": {"updated_at": "2024-01-08T00:00:00Z"},
-                "states": [
-                    {
-                        "cursor": {"updated_at": "2024-01-20T00:00:00Z"},
-                        "partition": {"id": "1"},
-                    },
-                    {
-                        "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
-                        "partition": {"id": "2"},
-                    },
+                "test_incremental_error_handling",
+                LISTPARTITION_MANIFEST,
+                [
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-20T00:00:00Z",
+                            {
+                                "comments": [
+                                    {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
+                                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                                ],
+                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
+                            },
+                    ),
+                    # Error response for the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
+                            None,  # Simulate a network error or an empty response
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
+                            {
+                                "comments": [
+                                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
+                                ],
+                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                            },
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
+                    ),
                 ],
-                "use_global_cursor": False,
-            },
-            # Expected state
-            {
-                "lookback_window": 0,
-                "use_global_cursor": False,
-                "state": {"updated_at": "2024-01-08T00:00:00Z"},
-                "states": [
-                    {"cursor": {"updated_at": "2024-01-20T00:00:00Z"}, "partition": {"id": "1"}},
-                    {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
-                    {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
+                # Expected records
+                [
+                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
+                    {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
                 ],
-            },
+                # Initial state
+                {
+                    "state": {"updated_at": "2024-01-08T00:00:00Z"},
+                    "states": [
+                        {
+                            "cursor": {"updated_at": "2024-01-20T00:00:00Z"},
+                            "partition": {"id": "1"},
+                        },
+                        {
+                            "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
+                            "partition": {"id": "2"},
+                        },
+                    ],
+                    "use_global_cursor": False,
+                },
+                # Expected state
+                {
+                    "lookback_window": 0,
+                    "use_global_cursor": False,
+                    "state": {"updated_at": "2024-01-08T00:00:00Z"},
+                    "states": [
+                        {"cursor": {"updated_at": "2024-01-20T00:00:00Z"}, "partition": {"id": "1"}},
+                        {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
+                        {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
+                    ],
+                },
         ),
     ],
 )
 def test_incremental_error(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test with failed request.
@@ -2707,261 +2994,261 @@ def test_incremental_error(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-            "test_incremental_parent_state",
-            SUBSTREAM_REQUEST_OPTIONS_MANIFEST,
-            [
-                # Fetch the first page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                    {
-                        "posts": [
-                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                        ],
-                        "next_page": (
-                            f"https://api.example.com/community/posts"
-                            f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
-                        ),
-                    },
-                ),
-                # Fetch the second page of posts
-                (
-                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
-                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 1
-                (
-                    "https://api.example.com/community/posts_comments?per_page=100&post_id=1",
-                    {
-                        "comments": [
+                "test_incremental_parent_state",
+                SUBSTREAM_REQUEST_OPTIONS_MANIFEST,
+                [
+                    # Fetch the first page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
                             {
-                                "id": 9,
-                                "post_id": 1,
-                                "updated_at": COMMENT_9_OLDEST,
+                                "posts": [
+                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts"
+                                        f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
+                                ),
                             },
+                    ),
+                    # Fetch the second page of posts
+                    (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
+                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts_comments?per_page=100&post_id=1",
                             {
-                                "id": 10,
-                                "post_id": 1,
-                                "updated_at": COMMENT_10_UPDATED_AT,
+                                "comments": [
+                                    {
+                                        "id": 9,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_9_OLDEST,
+                                    },
+                                    {
+                                        "id": 10,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_10_UPDATED_AT,
+                                    },
+                                    {
+                                        "id": 11,
+                                        "post_id": 1,
+                                        "updated_at": COMMENT_11_UPDATED_AT,
+                                    },
+                                ],
+                                "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
                             },
+                    ),
+                    # Fetch the second page of comments for post 1
+                    (
+                            "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
+                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 10 of post 1
+                    (
+                            f"https://api.example.com/community/posts_comments_votes?per_page=100&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "id": 11,
-                                "post_id": 1,
-                                "updated_at": COMMENT_11_UPDATED_AT,
+                                "votes": [
+                                    {
+                                        "id": 100,
+                                        "comment_id": 10,
+                                        "created_at": VOTE_100_CREATED_AT,
+                                    }
+                                ],
+                                "next_page": (
+                                        f"https://api.example.com/community/posts_comments_votes"
+                                        f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                                ),
                             },
-                        ],
-                        "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
-                    },
-                ),
-                # Fetch the second page of comments for post 1
-                (
-                    "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
-                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts_comments_votes?per_page=100&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {
-                        "votes": [
-                            {
-                                "id": 100,
-                                "comment_id": 10,
-                                "created_at": VOTE_100_CREATED_AT,
-                            }
-                        ],
-                        "next_page": (
+                    ),
+                    # Fetch the second page of votes for comment 10 of post 1
+                    (
                             f"https://api.example.com/community/posts_comments_votes"
-                            f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                        ),
-                    },
-                ),
-                # Fetch the second page of votes for comment 10 of post 1
-                (
-                    f"https://api.example.com/community/posts_comments_votes"
-                    f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 11 of post 1
-                (
-                    f"https://api.example.com/community/posts_comments_votes"
-                    f"?per_page=100&comment_id=11&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 12 of post 1
-                (
-                    f"https://api.example.com/community/posts_comments_votes?"
-                    f"per_page=100&comment_id=12&start_time={LOOKBACK_DATE}",
-                    {"votes": []},
-                ),
-                # Fetch the first page of comments for post 2
-                (
-                    "https://api.example.com/community/posts_comments?per_page=100&post_id=2",
-                    {
-                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                        "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
-                    },
-                ),
-                # Fetch the second page of comments for post 2
-                (
-                    "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
-                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 20 of post 2
-                (
-                    f"https://api.example.com/community/posts_comments_votes"
-                    f"?per_page=100&comment_id=20&start_time={LOOKBACK_DATE}",
-                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 21 of post 2
-                (
-                    f"https://api.example.com/community/posts_comments_votes?"
-                    f"per_page=100&comment_id=21&start_time={LOOKBACK_DATE}",
-                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                ),
-                # Fetch the first page of comments for post 3
-                (
-                    "https://api.example.com/community/posts_comments?per_page=100&post_id=3",
-                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                ),
-                # Fetch the first page of votes for comment 30 of post 3
-                (
-                    f"https://api.example.com/community/posts_comments_votes?"
-                    f"per_page=100&comment_id=30&start_time={LOOKBACK_DATE}",
-                    {"votes": [{"id": 300, "comment_id": 30, "created_at": VOTE_300_CREATED_AT}]},
-                ),
-            ],
-            # Expected records
-            [
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_100_CREATED_AT,
-                    "id": 100,
-                },
-                {
-                    "comment_id": 10,
-                    "comment_updated_at": COMMENT_10_UPDATED_AT,
-                    "created_at": VOTE_101_CREATED_AT,
-                    "id": 101,
-                },
-                {
-                    "comment_id": 11,
-                    "comment_updated_at": COMMENT_11_UPDATED_AT,
-                    "created_at": VOTE_111_CREATED_AT,
-                    "id": 111,
-                },
-                {
-                    "comment_id": 20,
-                    "comment_updated_at": COMMENT_20_UPDATED_AT,
-                    "created_at": VOTE_200_CREATED_AT,
-                    "id": 200,
-                },
-                {
-                    "comment_id": 21,
-                    "comment_updated_at": COMMENT_21_UPDATED_AT,
-                    "created_at": VOTE_210_CREATED_AT,
-                    "id": 210,
-                },
-                {
-                    "comment_id": 30,
-                    "comment_updated_at": COMMENT_30_UPDATED_AT,
-                    "created_at": VOTE_300_CREATED_AT,
-                    "id": 300,
-                },
-            ],
-            # Initial state
-            {
-                "parent_state": {
-                    "post_comments": {
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                            }
-                        ],
-                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                    }
-                },
-                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-                "states": [
-                    {
-                        "partition": {
-                            "id": 10,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                    },
-                    {
-                        "partition": {
-                            "id": 11,
-                            "parent_slice": {"id": 1, "parent_slice": {}},
-                        },
-                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    },
-                ],
-                "lookback_window": 86400,
-            },
-            # Expected state
-            {
-                "state": {"created_at": VOTE_100_CREATED_AT},
-                "parent_state": {
-                    "post_comments": {
-                        "use_global_cursor": False,
-                        "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
-                        "parent_state": {
-                            "posts": {"updated_at": POST_1_UPDATED_AT}
-                        },  # post 1 is the latest
-                        "lookback_window": 1,
-                        "states": [
-                            {
-                                "partition": {"id": 1, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
-                            },
-                            {
-                                "partition": {"id": 2, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
-                            },
+                            f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 11 of post 1
+                    (
+                            f"https://api.example.com/community/posts_comments_votes"
+                            f"?per_page=100&comment_id=11&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 12 of post 1
+                    (
+                            f"https://api.example.com/community/posts_comments_votes?"
+                            f"per_page=100&comment_id=12&start_time={LOOKBACK_DATE}",
+                            {"votes": []},
+                    ),
+                    # Fetch the first page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts_comments?per_page=100&post_id=2",
                             {
-                                "partition": {"id": 3, "parent_slice": {}},
-                                "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                                "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
                             },
-                        ],
-                    }
-                },
-                "lookback_window": 1,
-                "use_global_cursor": False,
-                "states": [
+                    ),
+                    # Fetch the second page of comments for post 2
+                    (
+                            "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
+                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 20 of post 2
+                    (
+                            f"https://api.example.com/community/posts_comments_votes"
+                            f"?per_page=100&comment_id=20&start_time={LOOKBACK_DATE}",
+                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 21 of post 2
+                    (
+                            f"https://api.example.com/community/posts_comments_votes?"
+                            f"per_page=100&comment_id=21&start_time={LOOKBACK_DATE}",
+                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                    ),
+                    # Fetch the first page of comments for post 3
+                    (
+                            "https://api.example.com/community/posts_comments?per_page=100&post_id=3",
+                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                    ),
+                    # Fetch the first page of votes for comment 30 of post 3
+                    (
+                            f"https://api.example.com/community/posts_comments_votes?"
+                            f"per_page=100&comment_id=30&start_time={LOOKBACK_DATE}",
+                            {"votes": [{"id": 300, "comment_id": 30, "created_at": VOTE_300_CREATED_AT}]},
+                    ),
+                ],
+                # Expected records
+                [
                     {
-                        "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_100_CREATED_AT},
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_100_CREATED_AT,
+                        "id": 100,
                     },
                     {
-                        "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_111_CREATED_AT},
+                        "comment_id": 10,
+                        "comment_updated_at": COMMENT_10_UPDATED_AT,
+                        "created_at": VOTE_101_CREATED_AT,
+                        "id": 101,
                     },
                     {
-                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                        "cursor": {"created_at": LOOKBACK_DATE},
+                        "comment_id": 11,
+                        "comment_updated_at": COMMENT_11_UPDATED_AT,
+                        "created_at": VOTE_111_CREATED_AT,
+                        "id": 111,
                     },
                     {
-                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_200_CREATED_AT},
+                        "comment_id": 20,
+                        "comment_updated_at": COMMENT_20_UPDATED_AT,
+                        "created_at": VOTE_200_CREATED_AT,
+                        "id": 200,
                     },
                     {
-                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        "comment_id": 21,
+                        "comment_updated_at": COMMENT_21_UPDATED_AT,
+                        "created_at": VOTE_210_CREATED_AT,
+                        "id": 210,
                     },
                     {
-                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                        "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        "comment_id": 30,
+                        "comment_updated_at": COMMENT_30_UPDATED_AT,
+                        "created_at": VOTE_300_CREATED_AT,
+                        "id": 300,
                     },
                 ],
-            },
+                # Initial state
+                {
+                    "parent_state": {
+                        "post_comments": {
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                                }
+                            ],
+                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                        }
+                    },
+                    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                    "states": [
+                        {
+                            "partition": {
+                                "id": 10,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                        },
+                        {
+                            "partition": {
+                                "id": 11,
+                                "parent_slice": {"id": 1, "parent_slice": {}},
+                            },
+                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        },
+                    ],
+                    "lookback_window": 86400,
+                },
+                # Expected state
+                {
+                    "state": {"created_at": VOTE_100_CREATED_AT},
+                    "parent_state": {
+                        "post_comments": {
+                            "use_global_cursor": False,
+                            "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
+                            "parent_state": {
+                                "posts": {"updated_at": POST_1_UPDATED_AT}
+                            },  # post 1 is the latest
+                            "lookback_window": 1,
+                            "states": [
+                                {
+                                    "partition": {"id": 1, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                                },
+                                {
+                                    "partition": {"id": 2, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                                },
+                                {
+                                    "partition": {"id": 3, "parent_slice": {}},
+                                    "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                                },
+                            ],
+                        }
+                    },
+                    "lookback_window": 1,
+                    "use_global_cursor": False,
+                    "states": [
+                        {
+                            "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_100_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_111_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                            "cursor": {"created_at": LOOKBACK_DATE},
+                        },
+                        {
+                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_200_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        },
+                        {
+                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                            "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        },
+                    ],
+                },
         ),
     ],
 )
 def test_incremental_substream_request_options_provider(
-    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental syncing for a stream that uses request options provider from parent stream config.

From a1d98fbc41f1d387b5e97197b81641adc66ed1ee Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Tue, 18 Feb 2025 18:44:40 +0200
Subject: [PATCH 22/26] Fix format

---
 .../test_concurrent_perpartitioncursor.py     | 3194 ++++++++---------
 1 file changed, 1597 insertions(+), 1597 deletions(-)

diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index 23459366d..f650847a6 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -306,7 +306,7 @@
 
 
 def run_mocked_test(
-        mock_requests, manifest, config, stream_name, initial_state, expected_records, expected_state
+    mock_requests, manifest, config, stream_name, initial_state, expected_records, expected_state
 ):
     """
     Helper function to mock requests, run the test, and verify the results.
@@ -356,15 +356,15 @@ def run_mocked_test(
                 [req for req in m.request_history if unquote(req.url) == unquote(url)]
             )
             assert (
-                    request_count == 1
+                request_count == 1
             ), f"URL {url} was called {request_count} times, expected exactly once."
 
 
 def _run_read(
-        manifest: Mapping[str, Any],
-        config: Mapping[str, Any],
-        stream_name: str,
-        state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
+    manifest: Mapping[str, Any],
+    config: Mapping[str, Any],
+    stream_name: str,
+    state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
 ) -> EntrypointOutput:
     source = ConcurrentDeclarativeSource(
         source_config=manifest, config=config, catalog=None, state=state
@@ -416,8 +416,8 @@ def _run_read(
     INITIAL_STATE_PARTITION_11_CURSOR.replace("Z", "")
 )
 LOOKBACK_DATE = (
-                        INITIAL_GLOBAL_CURSOR_DATE - timedelta(days=LOOKBACK_WINDOW_DAYS)
-                ).isoformat() + "Z"
+    INITIAL_GLOBAL_CURSOR_DATE - timedelta(days=LOOKBACK_WINDOW_DAYS)
+).isoformat() + "Z"
 
 PARTITION_SYNC_START_TIME = "2024-01-02T00:00:00Z"
 
@@ -426,316 +426,316 @@ def _run_read(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_MANIFEST_NO_DEPENDENCY,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}",
-                            {
-                                "posts": [
-                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                                ],
-                                "next_page": f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
-                            },
-                    ),
-                    # Fetch the second page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
-                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100",
-                            {
-                                "comments": [
-                                    {
-                                        "id": 9,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_9_OLDEST,  # No requests for comment 9, filtered out due to the date
-                                    },
-                                    {
-                                        "id": 10,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_10_UPDATED_AT,
-                                    },
-                                    {
-                                        "id": 11,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_11_UPDATED_AT,
-                                    },
-                                ],
-                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                            },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                            {
-                                "comments": [
-                                    {
-                                        "id": 12,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_12_UPDATED_AT,
-                                    }
-                                ]
-                            },
-                    ),
-                    # Fetch the first page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {
-                                "votes": [
-                                    {
-                                        "id": 100,
-                                        "comment_id": 10,
-                                        "created_at": VOTE_100_CREATED_AT,
-                                    }
-                                ],
-                                "next_page": f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            },
-                    ),
-                    # Fetch the second page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {
-                                "votes": [
-                                    {
-                                        "id": 101,
-                                        "comment_id": 10,
-                                        "created_at": VOTE_101_CREATED_AT,
-                                    }
-                                ]
-                            },
-                    ),
-                    # Fetch the first page of votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+            "test_incremental_parent_state",
+            SUBSTREAM_MANIFEST_NO_DEPENDENCY,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}",
+                    {
+                        "posts": [
+                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                        ],
+                        "next_page": f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
+                    },
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={START_DATE}&page=2",
+                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100",
+                    {
+                        "comments": [
                             {
-                                "votes": [
-                                    {
-                                        "id": 111,
-                                        "comment_id": 11,
-                                        "created_at": VOTE_111_CREATED_AT,
-                                    }
-                                ]
+                                "id": 9,
+                                "post_id": 1,
+                                "updated_at": COMMENT_9_OLDEST,  # No requests for comment 9, filtered out due to the date
                             },
-                    ),
-                    # Fetch the first page of votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100",
                             {
-                                "comments": [
-                                    {
-                                        "id": 20,
-                                        "post_id": 2,
-                                        "updated_at": COMMENT_20_UPDATED_AT,
-                                    }
-                                ],
-                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                                "id": 10,
+                                "post_id": 1,
+                                "updated_at": COMMENT_10_UPDATED_AT,
                             },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
                             {
-                                "comments": [
-                                    {
-                                        "id": 21,
-                                        "post_id": 2,
-                                        "updated_at": COMMENT_21_UPDATED_AT,
-                                    }
-                                ]
+                                "id": 11,
+                                "post_id": 1,
+                                "updated_at": COMMENT_11_UPDATED_AT,
                             },
-                    ),
-                    # Fetch the first page of votes for comment 20 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                        ],
+                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    {
+                        "comments": [
                             {
-                                "votes": [
-                                    {
-                                        "id": 200,
-                                        "comment_id": 20,
-                                        "created_at": VOTE_200_CREATED_AT,
-                                    }
-                                ]
-                            },
-                    ),
-                    # Fetch the first page of votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                                "id": 12,
+                                "post_id": 1,
+                                "updated_at": COMMENT_12_UPDATED_AT,
+                            }
+                        ]
+                    },
+                ),
+                # Fetch the first page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {
+                        "votes": [
                             {
-                                "votes": [
-                                    {
-                                        "id": 210,
-                                        "comment_id": 21,
-                                        "created_at": VOTE_210_CREATED_AT,
-                                    }
-                                ]
-                            },
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100",
+                                "id": 100,
+                                "comment_id": 10,
+                                "created_at": VOTE_100_CREATED_AT,
+                            }
+                        ],
+                        "next_page": f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    },
+                ),
+                # Fetch the second page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {
+                        "votes": [
                             {
-                                "comments": [
-                                    {
-                                        "id": 30,
-                                        "post_id": 3,
-                                        "updated_at": COMMENT_30_UPDATED_AT,
-                                    }
-                                ]
-                            },
-                    ),
-                    # Fetch the first page of votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                                "id": 101,
+                                "comment_id": 10,
+                                "created_at": VOTE_101_CREATED_AT,
+                            }
+                        ]
+                    },
+                ),
+                # Fetch the first page of votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {
+                        "votes": [
                             {
-                                "votes": [
-                                    {
-                                        "id": 300,
-                                        "comment_id": 30,
-                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                                    }
-                                ]
-                            },
-                    ),
-                ],
-                # Expected records
-                [
+                                "id": 111,
+                                "comment_id": 11,
+                                "created_at": VOTE_111_CREATED_AT,
+                            }
+                        ]
+                    },
+                ),
+                # Fetch the first page of votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100",
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_100_CREATED_AT,
-                        "id": 100,
+                        "comments": [
+                            {
+                                "id": 20,
+                                "post_id": 2,
+                                "updated_at": COMMENT_20_UPDATED_AT,
+                            }
+                        ],
+                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
                     },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_101_CREATED_AT,
-                        "id": 101,
+                        "comments": [
+                            {
+                                "id": 21,
+                                "post_id": 2,
+                                "updated_at": COMMENT_21_UPDATED_AT,
+                            }
+                        ]
                     },
+                ),
+                # Fetch the first page of votes for comment 20 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "comment_id": 11,
-                        "comment_updated_at": COMMENT_11_UPDATED_AT,
-                        "created_at": VOTE_111_CREATED_AT,
-                        "id": 111,
+                        "votes": [
+                            {
+                                "id": 200,
+                                "comment_id": 20,
+                                "created_at": VOTE_200_CREATED_AT,
+                            }
+                        ]
                     },
+                ),
+                # Fetch the first page of votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "comment_id": 20,
-                        "comment_updated_at": COMMENT_20_UPDATED_AT,
-                        "created_at": VOTE_200_CREATED_AT,
-                        "id": 200,
+                        "votes": [
+                            {
+                                "id": 210,
+                                "comment_id": 21,
+                                "created_at": VOTE_210_CREATED_AT,
+                            }
+                        ]
                     },
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100",
                     {
-                        "comment_id": 21,
-                        "comment_updated_at": COMMENT_21_UPDATED_AT,
-                        "created_at": VOTE_210_CREATED_AT,
-                        "id": 210,
+                        "comments": [
+                            {
+                                "id": 30,
+                                "post_id": 3,
+                                "updated_at": COMMENT_30_UPDATED_AT,
+                            }
+                        ]
                     },
+                ),
+                # Fetch the first page of votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "comment_id": 30,
-                        "comment_updated_at": COMMENT_30_UPDATED_AT,
-                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                        "id": 300,
+                        "votes": [
+                            {
+                                "id": 300,
+                                "comment_id": 30,
+                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                            }
+                        ]
                     },
-                ],
-                # Initial state
+                ),
+            ],
+            # Expected records
+            [
                 {
-                    # This should not happen since parent state is disabled, but I've added this to validate that and
-                    # incoming parent_state is ignored when the parent stream's incremental_dependency is disabled
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
-                    },
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR_TIMESTAMP},
-                        },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                        },
-                    ],
-                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR_TIMESTAMP},
-                    "lookback_window": 86400,
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_100_CREATED_AT,
+                    "id": 100,
                 },
-                # Expected state
                 {
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": VOTE_100_CREATED_AT},
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_101_CREATED_AT,
+                    "id": 101,
+                },
+                {
+                    "comment_id": 11,
+                    "comment_updated_at": COMMENT_11_UPDATED_AT,
+                    "created_at": VOTE_111_CREATED_AT,
+                    "id": 111,
+                },
+                {
+                    "comment_id": 20,
+                    "comment_updated_at": COMMENT_20_UPDATED_AT,
+                    "created_at": VOTE_200_CREATED_AT,
+                    "id": 200,
+                },
+                {
+                    "comment_id": 21,
+                    "comment_updated_at": COMMENT_21_UPDATED_AT,
+                    "created_at": VOTE_210_CREATED_AT,
+                    "id": 210,
+                },
+                {
+                    "comment_id": 30,
+                    "comment_updated_at": COMMENT_30_UPDATED_AT,
+                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                    "id": 300,
+                },
+            ],
+            # Initial state
+            {
+                # This should not happen since parent state is disabled, but I've added this to validate that and
+                # incoming parent_state is ignored when the parent stream's incremental_dependency is disabled
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
+                },
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": VOTE_111_CREATED_AT},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR_TIMESTAMP},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 12,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": LOOKBACK_DATE},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                ],
+                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR_TIMESTAMP},
+                "lookback_window": 86400,
+            },
+            # Expected state
+            {
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 20,
-                                "parent_slice": {"id": 2, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": VOTE_200_CREATED_AT},
+                        "cursor": {"created_at": VOTE_100_CREATED_AT},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 21,
-                                "parent_slice": {"id": 2, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": VOTE_210_CREATED_AT},
+                        "cursor": {"created_at": VOTE_111_CREATED_AT},
+                    },
+                    {
+                        "partition": {
+                            "id": 12,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 30,
-                                "parent_slice": {"id": 3, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        "cursor": {"created_at": LOOKBACK_DATE},
+                    },
+                    {
+                        "partition": {
+                            "id": 20,
+                            "parent_slice": {"id": 2, "parent_slice": {}},
                         },
-                    ],
-                    "use_global_cursor": False,
-                    "lookback_window": 1,
-                    "parent_state": {},
-                    "state": {"created_at": VOTE_100_CREATED_AT},
-                },
+                        "cursor": {"created_at": VOTE_200_CREATED_AT},
+                    },
+                    {
+                        "partition": {
+                            "id": 21,
+                            "parent_slice": {"id": 2, "parent_slice": {}},
+                        },
+                        "cursor": {"created_at": VOTE_210_CREATED_AT},
+                    },
+                    {
+                        "partition": {
+                            "id": 30,
+                            "parent_slice": {"id": 3, "parent_slice": {}},
+                        },
+                        "cursor": {"created_at": VOTE_300_CREATED_AT},
+                    },
+                ],
+                "use_global_cursor": False,
+                "lookback_window": 1,
+                "parent_state": {},
+                "state": {"created_at": VOTE_100_CREATED_AT},
+            },
         ),
     ],
 )
 def test_incremental_parent_state_no_incremental_dependency(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     This is a pretty complicated test that syncs a low-code connector stream with three levels of substreams
@@ -761,13 +761,13 @@ def test_incremental_parent_state_no_incremental_dependency(
 
 
 def run_incremental_parent_state_test(
-        manifest,
-        mock_requests,
-        expected_records,
-        num_intermediate_states,
-        intermidiate_states,
-        initial_state,
-        expected_states,
+    manifest,
+    mock_requests,
+    expected_records,
+    num_intermediate_states,
+    intermidiate_states,
+    initial_state,
+    expected_states,
 ):
     """
     Run an incremental parent state test for the specified stream.
@@ -856,8 +856,8 @@ def run_incremental_parent_state_test(
                 {orjson.dumps(record): record for record in expected_records}.values()
             )
             assert (
-                    sorted(cumulative_records_state_deduped, key=lambda x: x["id"])
-                    == sorted(expected_records_set, key=lambda x: x["id"])
+                sorted(cumulative_records_state_deduped, key=lambda x: x["id"])
+                == sorted(expected_records_set, key=lambda x: x["id"])
             ), f"Records mismatch with intermediate state {state}. Expected {expected_records}, got {cumulative_records_state_deduped}"
 
             # Store the final state after each intermediate read
@@ -870,7 +870,7 @@ def run_incremental_parent_state_test(
         # Assert that the final state matches the expected state for all runs
         for i, final_state in enumerate(final_states):
             assert (
-                    final_state in expected_states
+                final_state in expected_states
             ), f"Final state mismatch at run {i + 1}. Expected {expected_states}, got {final_state}"
 
 
@@ -1183,295 +1183,295 @@ def run_incremental_parent_state_test(
     "test_name, manifest, mock_requests, expected_records, num_intermediate_states, intermidiate_states, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_MANIFEST,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+            "test_incremental_parent_state",
+            SUBSTREAM_MANIFEST,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                    {
+                        "posts": [
+                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            f"https://api.example.com/community/posts"
+                            f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
+                        ),
+                    },
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
+                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100",
+                    {
+                        "comments": [
                             {
-                                "posts": [
-                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts"
-                                        f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
-                                ),
+                                "id": 9,
+                                "post_id": 1,
+                                "updated_at": COMMENT_9_OLDEST,
                             },
-                    ),
-                    # Fetch the second page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
-                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100",
                             {
-                                "comments": [
-                                    {
-                                        "id": 9,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_9_OLDEST,
-                                    },
-                                    {
-                                        "id": 10,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_10_UPDATED_AT,
-                                    },
-                                    {
-                                        "id": 11,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_11_UPDATED_AT,
-                                    },
-                                ],
-                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                                "id": 10,
+                                "post_id": 1,
+                                "updated_at": COMMENT_10_UPDATED_AT,
                             },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "votes": [
-                                    {
-                                        "id": 100,
-                                        "comment_id": 10,
-                                        "created_at": VOTE_100_CREATED_AT,
-                                    }
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts/1/comments/10/votes"
-                                        f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                                ),
+                                "id": 11,
+                                "post_id": 1,
+                                "updated_at": COMMENT_11_UPDATED_AT,
                             },
-                    ),
-                    # Fetch the second page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/11/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100",
+                        ],
+                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {
+                        "votes": [
                             {
-                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 20 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                                "id": 100,
+                                "comment_id": 10,
+                                "created_at": VOTE_100_CREATED_AT,
+                            }
+                        ],
+                        "next_page": (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                        ),
+                    },
+                ),
+                # Fetch the second page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/11/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100",
+                    {
+                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                    },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 20 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                    {
+                        "votes": [
                             {
-                                "votes": [
-                                    {
-                                        "id": 300,
-                                        "comment_id": 30,
-                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                                    }
-                                ]
-                            },
-                    ),
-                    # Requests with intermediate states
-                    # Fetch votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={VOTE_100_CREATED_AT}",
+                                "id": 300,
+                                "comment_id": 30,
+                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                            }
+                        ]
+                    },
+                ),
+                # Requests with intermediate states
+                # Fetch votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes?per_page=100&start_time={VOTE_100_CREATED_AT}",
+                    {
+                        "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
+                    },
+                ),
+                # Fetch votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                    {
+                        "votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}],
+                    },
+                ),
+                # Fetch votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                    {
+                        "votes": [],
+                    },
+                ),
+                # Fetch votes for comment 20 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={VOTE_200_CREATED_AT}",
+                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                ),
+                # Fetch votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={VOTE_210_CREATED_AT}",
+                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                ),
+                # Fetch votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={VOTE_300_CREATED_AT}",
+                    {
+                        "votes": [
                             {
-                                "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
-                            },
-                    ),
-                    # Fetch votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/11/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
+                                "id": 300,
+                                "comment_id": 30,
+                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                            }
+                        ]
+                    },
+                ),
+            ],
+            # Expected records
+            [
+                {
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_100_CREATED_AT,
+                    "id": 100,
+                },
+                {
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_101_CREATED_AT,
+                    "id": 101,
+                },
+                {
+                    "comment_id": 11,
+                    "comment_updated_at": COMMENT_11_UPDATED_AT,
+                    "created_at": VOTE_111_CREATED_AT,
+                    "id": 111,
+                },
+                {
+                    "comment_id": 20,
+                    "comment_updated_at": COMMENT_20_UPDATED_AT,
+                    "created_at": VOTE_200_CREATED_AT,
+                    "id": 200,
+                },
+                {
+                    "comment_id": 21,
+                    "comment_updated_at": COMMENT_21_UPDATED_AT,
+                    "created_at": VOTE_210_CREATED_AT,
+                    "id": 210,
+                },
+                {
+                    "comment_id": 30,
+                    "comment_updated_at": COMMENT_30_UPDATED_AT,
+                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                    "id": 300,
+                },
+            ],
+            # Number of intermediate states - 6 as number of parent partitions
+            6,
+            # Intermediate states
+            INTERMEDIATE_STATES,
+            # Initial state
+            INITIAL_STATE,
+            # Expected state
+            {
+                "state": {"created_at": VOTE_100_CREATED_AT},
+                "parent_state": {
+                    "post_comments": {
+                        "use_global_cursor": False,
+                        "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
+                        "parent_state": {
+                            "posts": {"updated_at": POST_1_UPDATED_AT}
+                        },  # post 1 is the latest
+                        "lookback_window": 1,
+                        "states": [
                             {
-                                "votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}],
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
                             },
-                    ),
-                    # Fetch votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={VOTE_111_CREATED_AT}",
                             {
-                                "votes": [],
+                                "partition": {"id": 2, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
                             },
-                    ),
-                    # Fetch votes for comment 20 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/20/votes?per_page=100&start_time={VOTE_200_CREATED_AT}",
-                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                    ),
-                    # Fetch votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/21/votes?per_page=100&start_time={VOTE_210_CREATED_AT}",
-                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                    ),
-                    # Fetch votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts/3/comments/30/votes?per_page=100&start_time={VOTE_300_CREATED_AT}",
                             {
-                                "votes": [
-                                    {
-                                        "id": 300,
-                                        "comment_id": 30,
-                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                                    }
-                                ]
+                                "partition": {"id": 3, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
                             },
-                    ),
-                ],
-                # Expected records
-                [
+                        ],
+                    }
+                },
+                "lookback_window": 1,
+                "use_global_cursor": False,
+                "states": [
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_100_CREATED_AT,
-                        "id": 100,
+                        "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_100_CREATED_AT},
                     },
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_101_CREATED_AT,
-                        "id": 101,
+                        "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_111_CREATED_AT},
                     },
                     {
-                        "comment_id": 11,
-                        "comment_updated_at": COMMENT_11_UPDATED_AT,
-                        "created_at": VOTE_111_CREATED_AT,
-                        "id": 111,
+                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": LOOKBACK_DATE},
                     },
                     {
-                        "comment_id": 20,
-                        "comment_updated_at": COMMENT_20_UPDATED_AT,
-                        "created_at": VOTE_200_CREATED_AT,
-                        "id": 200,
+                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_200_CREATED_AT},
                     },
                     {
-                        "comment_id": 21,
-                        "comment_updated_at": COMMENT_21_UPDATED_AT,
-                        "created_at": VOTE_210_CREATED_AT,
-                        "id": 210,
+                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_210_CREATED_AT},
                     },
                     {
-                        "comment_id": 30,
-                        "comment_updated_at": COMMENT_30_UPDATED_AT,
-                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                        "id": 300,
+                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_300_CREATED_AT},
                     },
                 ],
-                # Number of intermediate states - 6 as number of parent partitions
-                6,
-                # Intermediate states
-                INTERMEDIATE_STATES,
-                # Initial state
-                INITIAL_STATE,
-                # Expected state
-                {
-                    "state": {"created_at": VOTE_100_CREATED_AT},
-                    "parent_state": {
-                        "post_comments": {
-                            "use_global_cursor": False,
-                            "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
-                            "parent_state": {
-                                "posts": {"updated_at": POST_1_UPDATED_AT}
-                            },  # post 1 is the latest
-                            "lookback_window": 1,
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
-                                },
-                                {
-                                    "partition": {"id": 2, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
-                                },
-                                {
-                                    "partition": {"id": 3, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
-                                },
-                            ],
-                        }
-                    },
-                    "lookback_window": 1,
-                    "use_global_cursor": False,
-                    "states": [
-                        {
-                            "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_100_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_111_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": LOOKBACK_DATE},
-                        },
-                        {
-                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_200_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_210_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_300_CREATED_AT},
-                        },
-                    ],
-                },
+            },
         ),
     ],
 )
 def test_incremental_parent_state(
-        test_name,
-        manifest,
-        mock_requests,
-        expected_records,
-        num_intermediate_states,
-        intermidiate_states,
-        initial_state,
-        expected_state,
+    test_name,
+    manifest,
+    mock_requests,
+    expected_records,
+    num_intermediate_states,
+    intermidiate_states,
+    initial_state,
+    expected_state,
 ):
     # Patch `_throttle_state_message` so it always returns a float (indicating "no throttle")
     with patch.object(
-            ConcurrentPerPartitionCursor, "_throttle_state_message", return_value=9999999.0
+        ConcurrentPerPartitionCursor, "_throttle_state_message", return_value=9999999.0
     ):
         run_incremental_parent_state_test(
             manifest,
@@ -1546,208 +1546,208 @@ def test_incremental_parent_state(
     "test_name, manifest, mock_requests, expected_records",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_MANIFEST,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {
-                                "posts": [
-                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts?per_page=100"
-                                        f"&start_time={PARTITION_SYNC_START_TIME}&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100"
-                            f"&start_time={PARTITION_SYNC_START_TIME}&page=2",
-                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100",
-                            {
-                                "comments": [
-                                    {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
-                                    {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
-                                    {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        "https://api.example.com/community/posts/1/comments"
-                                        "?per_page=100&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {
-                                "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts/1/comments/10/votes"
-                                        f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}",
-                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/11/votes"
-                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/12/votes"
-                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100",
-                            {
-                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                                "next_page": (
-                                        "https://api.example.com/community/posts/2/comments"
-                                        "?per_page=100&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 20 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/20/votes"
-                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/21/votes"
-                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts/3/comments/30/votes"
-                            f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
-                            {
-                                "votes": [
-                                    {
-                                        "id": 300,
-                                        "comment_id": 30,
-                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                                    }
-                                ]
-                            },
-                    ),
-                ],
-                # Expected records
-                [
-                    {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_100_CREATED_AT,
-                        "id": 100,
-                    },
+            "test_incremental_parent_state",
+            SUBSTREAM_MANIFEST,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_101_CREATED_AT,
-                        "id": 101,
+                        "posts": [
+                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            f"https://api.example.com/community/posts?per_page=100"
+                            f"&start_time={PARTITION_SYNC_START_TIME}&page=2"
+                        ),
                     },
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100"
+                    f"&start_time={PARTITION_SYNC_START_TIME}&page=2",
+                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100",
                     {
-                        "comment_id": 11,
-                        "comment_updated_at": COMMENT_11_UPDATED_AT,
-                        "created_at": VOTE_111_CREATED_AT,
-                        "id": 111,
+                        "comments": [
+                            {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
+                            {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
+                            {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            "https://api.example.com/community/posts/1/comments"
+                            "?per_page=100&page=2"
+                        ),
                     },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
                     {
-                        "comment_id": 20,
-                        "comment_updated_at": COMMENT_20_UPDATED_AT,
-                        "created_at": VOTE_200_CREATED_AT,
-                        "id": 200,
+                        "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
+                        "next_page": (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}"
+                        ),
                     },
+                ),
+                # Fetch the second page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&page=2&start_time={PARTITION_SYNC_START_TIME}",
+                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/11/votes"
+                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/12/votes"
+                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100",
                     {
-                        "comment_id": 21,
-                        "comment_updated_at": COMMENT_21_UPDATED_AT,
-                        "created_at": VOTE_210_CREATED_AT,
-                        "id": 210,
+                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                        "next_page": (
+                            "https://api.example.com/community/posts/2/comments"
+                            "?per_page=100&page=2"
+                        ),
                     },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 20 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/20/votes"
+                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/21/votes"
+                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
+                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts/3/comments/30/votes"
+                    f"?per_page=100&start_time={PARTITION_SYNC_START_TIME}",
                     {
-                        "comment_id": 30,
-                        "comment_updated_at": COMMENT_30_UPDATED_AT,
-                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                        "id": 300,
+                        "votes": [
+                            {
+                                "id": 300,
+                                "comment_id": 30,
+                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                            }
+                        ]
                     },
-                ],
-        ),
-    ],
-)
+                ),
+            ],
+            # Expected records
+            [
+                {
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_100_CREATED_AT,
+                    "id": 100,
+                },
+                {
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_101_CREATED_AT,
+                    "id": 101,
+                },
+                {
+                    "comment_id": 11,
+                    "comment_updated_at": COMMENT_11_UPDATED_AT,
+                    "created_at": VOTE_111_CREATED_AT,
+                    "id": 111,
+                },
+                {
+                    "comment_id": 20,
+                    "comment_updated_at": COMMENT_20_UPDATED_AT,
+                    "created_at": VOTE_200_CREATED_AT,
+                    "id": 200,
+                },
+                {
+                    "comment_id": 21,
+                    "comment_updated_at": COMMENT_21_UPDATED_AT,
+                    "created_at": VOTE_210_CREATED_AT,
+                    "id": 210,
+                },
+                {
+                    "comment_id": 30,
+                    "comment_updated_at": COMMENT_30_UPDATED_AT,
+                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                    "id": 300,
+                },
+            ],
+        ),
+    ],
+)
 @pytest.mark.parametrize(
     "initial_state, expected_state",
     [
         ({"created_at": PARTITION_SYNC_START_TIME}, STATE_MIGRATION_EXPECTED_STATE),
         (
-                {
-                    "state": {"created_at": PARTITION_SYNC_START_TIME},
-                    "lookback_window": 0,
-                    "use_global_cursor": False,
-                    "parent_state": {
-                        "post_comments": {
-                            "state": {"updated_at": PARTITION_SYNC_START_TIME},
-                            "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
-                            "lookback_window": 0,
-                        }
-                    },
+            {
+                "state": {"created_at": PARTITION_SYNC_START_TIME},
+                "lookback_window": 0,
+                "use_global_cursor": False,
+                "parent_state": {
+                    "post_comments": {
+                        "state": {"updated_at": PARTITION_SYNC_START_TIME},
+                        "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
+                        "lookback_window": 0,
+                    }
                 },
-                STATE_MIGRATION_EXPECTED_STATE,
+            },
+            STATE_MIGRATION_EXPECTED_STATE,
         ),
         (
-                {
-                    "state": {"created_at": PARTITION_SYNC_START_TIME},
-                    "lookback_window": 0,
-                    "use_global_cursor": True,
-                    "parent_state": {
-                        "post_comments": {
-                            "state": {"updated_at": PARTITION_SYNC_START_TIME},
-                            "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
-                            "lookback_window": 0,
-                        }
-                    },
+            {
+                "state": {"created_at": PARTITION_SYNC_START_TIME},
+                "lookback_window": 0,
+                "use_global_cursor": True,
+                "parent_state": {
+                    "post_comments": {
+                        "state": {"updated_at": PARTITION_SYNC_START_TIME},
+                        "parent_state": {"posts": {"updated_at": PARTITION_SYNC_START_TIME}},
+                        "lookback_window": 0,
+                    }
                 },
-                STATE_MIGRATION_GLOBAL_EXPECTED_STATE,
+            },
+            STATE_MIGRATION_GLOBAL_EXPECTED_STATE,
         ),
         (
-                {
-                    "state": {"created_at": PARTITION_SYNC_START_TIME},
-                },
-                STATE_MIGRATION_EXPECTED_STATE,
+            {
+                "state": {"created_at": PARTITION_SYNC_START_TIME},
+            },
+            STATE_MIGRATION_EXPECTED_STATE,
         ),
     ],
     ids=[
@@ -1758,7 +1758,7 @@ def test_incremental_parent_state(
     ],
 )
 def test_incremental_parent_state_migration(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental partition router with parent state migration
@@ -1778,101 +1778,101 @@ def test_incremental_parent_state_migration(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_MANIFEST,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                            {
-                                "posts": [],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts?per_page=100"
-                                        f"&start_time={PARENT_POSTS_CURSOR}&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of posts
-                    (
+            "test_incremental_parent_state",
+            SUBSTREAM_MANIFEST,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                    {
+                        "posts": [],
+                        "next_page": (
                             f"https://api.example.com/community/posts?per_page=100"
-                            f"&start_time={PARENT_POSTS_CURSOR}&page=2",
-                            {"posts": []},
-                    ),
-                ],
-                # Expected records (empty)
-                [],
-                # Initial state
-                {
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
+                            f"&start_time={PARENT_POSTS_CURSOR}&page=2"
+                        ),
                     },
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100"
+                    f"&start_time={PARENT_POSTS_CURSOR}&page=2",
+                    {"posts": []},
+                ),
+            ],
+            # Expected records (empty)
+            [],
+            # Initial state
+            {
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
+                },
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                    ],
-                    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-                    "lookback_window": 1,
-                },
-                # Expected state
-                {
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "state": {},
-                            "use_global_cursor": False,
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
                     },
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                ],
+                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                "lookback_window": 1,
+            },
+            # Expected state
+            {
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "state": {},
+                        "use_global_cursor": False,
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
+                },
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                    ],
-                    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-                    "lookback_window": 1,
-                    "use_global_cursor": False,
-                },
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                ],
+                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                "lookback_window": 1,
+                "use_global_cursor": False,
+            },
         ),
     ],
 )
 def test_incremental_parent_state_no_slices(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental partition router with no parent records
@@ -1892,217 +1892,217 @@ def test_incremental_parent_state_no_slices(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_MANIFEST,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                            {
-                                "posts": [
-                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts?per_page=100"
-                                        f"&start_time={PARENT_POSTS_CURSOR}&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of posts
-                    (
+            "test_incremental_parent_state",
+            SUBSTREAM_MANIFEST,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                    {
+                        "posts": [
+                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                        ],
+                        "next_page": (
                             f"https://api.example.com/community/posts?per_page=100"
-                            f"&start_time={PARENT_POSTS_CURSOR}&page=2",
-                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100",
-                            {
-                                "comments": [
-                                    {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
-                                    {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
-                                    {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {
-                                "votes": [],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts/1/comments/10/votes"
-                                        f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of votes for comment 10 of post 1
-                    (
+                            f"&start_time={PARENT_POSTS_CURSOR}&page=2"
+                        ),
+                    },
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100"
+                    f"&start_time={PARENT_POSTS_CURSOR}&page=2",
+                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100",
+                    {
+                        "comments": [
+                            {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
+                            {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
+                            {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
+                        ),
+                    },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {
+                        "votes": [],
+                        "next_page": (
                             f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/11/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/12/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100",
-                            {
-                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                                "next_page": (
-                                        "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 20 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/20/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/21/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts/3/comments/30/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": []},
-                    ),
-                ],
-                # Expected records
-                [],
-                # Initial state
-                {
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
+                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                        ),
                     },
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                        },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                        },
-                    ],
-                    "use_global_cursor": False,
-                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    "lookback_window": 0,
+                ),
+                # Fetch the second page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/11/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/12/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100",
+                    {
+                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                        "next_page": (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
+                        ),
+                    },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 20 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/20/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/21/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts/3/comments/30/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": []},
+                ),
+            ],
+            # Expected records
+            [],
+            # Initial state
+            {
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
                 },
-                # Expected state
-                {
-                    "lookback_window": 1,
-                    "use_global_cursor": False,
-                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                        },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                        },
-                        {
-                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                ],
+                "use_global_cursor": False,
+                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                "lookback_window": 0,
+            },
+            # Expected state
+            {
+                "lookback_window": 1,
+                "use_global_cursor": False,
+                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                    ],
-                    "parent_state": {
-                        "post_comments": {
-                            "use_global_cursor": False,
-                            "state": {"updated_at": COMMENT_10_UPDATED_AT},
-                            "parent_state": {"posts": {"updated_at": POST_1_UPDATED_AT}},
-                            "lookback_window": 1,
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
-                                },
-                                {
-                                    "partition": {"id": 2, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
-                                },
-                                {
-                                    "partition": {"id": 3, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
-                                },
-                            ],
-                        }
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                    {
+                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
                     },
+                    {
+                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                    {
+                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                    {
+                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                ],
+                "parent_state": {
+                    "post_comments": {
+                        "use_global_cursor": False,
+                        "state": {"updated_at": COMMENT_10_UPDATED_AT},
+                        "parent_state": {"posts": {"updated_at": POST_1_UPDATED_AT}},
+                        "lookback_window": 1,
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                            },
+                            {
+                                "partition": {"id": 2, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                            },
+                            {
+                                "partition": {"id": 3, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                            },
+                        ],
+                    }
                 },
+            },
         ),
     ],
 )
 def test_incremental_parent_state_no_records(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental partition router with no child records
@@ -2122,238 +2122,238 @@ def test_incremental_parent_state_no_records(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_MANIFEST,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
-                            {
-                                "posts": [
-                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
-                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100",
-                            {
-                                "comments": [
-                                    {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
-                                    {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
-                                    {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
-                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {
-                                "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts/1/comments/10/votes"
-                                        f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/10/votes"
-                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/11/votes"
-                            f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100",
-                            {
-                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                                "next_page": (
-                                        "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
-                                ),
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 20 of post 2 - 404 error
-                    (
-                            f"https://api.example.com/community/posts/2/comments/20/votes"
-                            f"?per_page=100&start_time={LOOKBACK_DATE}",
-                            None,
-                    ),
-                    # Fetch the first page of votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts/2/comments/21/votes"
-                            f"?per_page=100&start_time={LOOKBACK_DATE}",
-                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts/3/comments/30/votes"
-                            f"?per_page=100&start_time={LOOKBACK_DATE}",
-                            {
-                                "votes": [
-                                    {
-                                        "id": 300,
-                                        "comment_id": 30,
-                                        "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
-                                    }
-                                ]
-                            },
-                    ),
-                ],
-                # Expected records
-                [
+            "test_incremental_parent_state",
+            SUBSTREAM_MANIFEST,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_100_CREATED_AT,
-                        "id": 100,
+                        "posts": [
+                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
+                        ),
                     },
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
+                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100",
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_101_CREATED_AT,
-                        "id": 101,
+                        "comments": [
+                            {"id": 9, "post_id": 1, "updated_at": COMMENT_9_OLDEST},
+                            {"id": 10, "post_id": 1, "updated_at": COMMENT_10_UPDATED_AT},
+                            {"id": 11, "post_id": 1, "updated_at": COMMENT_11_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2"
+                        ),
                     },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2",
+                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                     {
-                        "comment_id": 11,
-                        "comment_updated_at": COMMENT_11_UPDATED_AT,
-                        "created_at": VOTE_111_CREATED_AT,
-                        "id": 111,
+                        "votes": [{"id": 100, "comment_id": 10, "created_at": VOTE_100_CREATED_AT}],
+                        "next_page": (
+                            f"https://api.example.com/community/posts/1/comments/10/votes"
+                            f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                        ),
                     },
+                ),
+                # Fetch the second page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/10/votes"
+                    f"?per_page=100&page=2&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/11/votes"
+                    f"?per_page=100&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts/1/comments/12/votes?per_page=100&start_time={LOOKBACK_DATE}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100",
                     {
-                        "comment_id": 21,
-                        "comment_updated_at": COMMENT_21_UPDATED_AT,
-                        "created_at": VOTE_210_CREATED_AT,
-                        "id": 210,
+                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                        "next_page": (
+                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2"
+                        ),
                     },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 20 of post 2 - 404 error
+                (
+                    f"https://api.example.com/community/posts/2/comments/20/votes"
+                    f"?per_page=100&start_time={LOOKBACK_DATE}",
+                    None,
+                ),
+                # Fetch the first page of votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts/2/comments/21/votes"
+                    f"?per_page=100&start_time={LOOKBACK_DATE}",
+                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts/3/comments/30/votes"
+                    f"?per_page=100&start_time={LOOKBACK_DATE}",
                     {
-                        "comment_id": 30,
-                        "comment_updated_at": COMMENT_30_UPDATED_AT,
-                        "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
-                        "id": 300,
+                        "votes": [
+                            {
+                                "id": 300,
+                                "comment_id": 30,
+                                "created_at": VOTE_300_CREATED_AT_TIMESTAMP,
+                            }
+                        ]
                     },
-                ],
-                # Initial state
+                ),
+            ],
+            # Expected records
+            [
                 {
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
-                    },
-                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    "lookback_window": 86400,
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                        },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                        },
-                    ],
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_100_CREATED_AT,
+                    "id": 100,
                 },
-                # Expected state
                 {
-                    # The global state, lookback window and the parent state are the same because sync failed for comment 20
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "state": {},
-                            "use_global_cursor": False,
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
-                    },
-                    "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                    "lookback_window": 86400,
-                    "use_global_cursor": False,
-                    "states": [
-                        {
-                            "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_100_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_111_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": LOOKBACK_DATE},
-                        },
-                        {
-                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": LOOKBACK_DATE},
-                        },
-                        {
-                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_210_CREATED_AT},
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_101_CREATED_AT,
+                    "id": 101,
+                },
+                {
+                    "comment_id": 11,
+                    "comment_updated_at": COMMENT_11_UPDATED_AT,
+                    "created_at": VOTE_111_CREATED_AT,
+                    "id": 111,
+                },
+                {
+                    "comment_id": 21,
+                    "comment_updated_at": COMMENT_21_UPDATED_AT,
+                    "created_at": VOTE_210_CREATED_AT,
+                    "id": 210,
+                },
+                {
+                    "comment_id": 30,
+                    "comment_updated_at": COMMENT_30_UPDATED_AT,
+                    "created_at": str(VOTE_300_CREATED_AT_TIMESTAMP),
+                    "id": 300,
+                },
+            ],
+            # Initial state
+            {
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
+                },
+                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                "lookback_window": 86400,
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                        {
-                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_300_CREATED_AT},
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
                         },
-                    ],
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                ],
+            },
+            # Expected state
+            {
+                # The global state, lookback window and the parent state are the same because sync failed for comment 20
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "state": {},
+                        "use_global_cursor": False,
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
                 },
+                "state": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                "lookback_window": 86400,
+                "use_global_cursor": False,
+                "states": [
+                    {
+                        "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_100_CREATED_AT},
+                    },
+                    {
+                        "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_111_CREATED_AT},
+                    },
+                    {
+                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": LOOKBACK_DATE},
+                    },
+                    {
+                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": LOOKBACK_DATE},
+                    },
+                    {
+                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_210_CREATED_AT},
+                    },
+                    {
+                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_300_CREATED_AT},
+                    },
+                ],
+            },
         ),
     ],
 )
 def test_incremental_substream_error(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     run_mocked_test(
         mock_requests,
@@ -2524,85 +2524,85 @@ def test_incremental_substream_error(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                LISTPARTITION_MANIFEST,
-                [
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-24T00:00:00Z",
-                            {
-                                "comments": [
-                                    {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
-                                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                                ],
-                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
-                            },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
-                            {"comments": [{"id": 12, "post_id": 1, "updated_at": "2024-01-23T00:00:00Z"}]},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
-                            {
-                                "comments": [
-                                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
-                                ],
-                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
-                    ),
+            "test_incremental_parent_state",
+            LISTPARTITION_MANIFEST,
+            [
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-24T00:00:00Z",
+                    {
+                        "comments": [
+                            {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
+                            {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                            {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                        ],
+                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
+                    },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-24T00:00:00Z",
+                    {"comments": [{"id": 12, "post_id": 1, "updated_at": "2024-01-23T00:00:00Z"}]},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
+                    {
+                        "comments": [
+                            {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
+                        ],
+                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                    },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
+                ),
+            ],
+            # Expected records
+            [
+                {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
+                {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
+            ],
+            # Initial state
+            {
+                "state": {"updated_at": "2024-01-08T00:00:00Z"},
+                "states": [
+                    {
+                        "cursor": {"updated_at": "2024-01-24T00:00:00Z"},
+                        "partition": {"id": "1"},
+                    },
+                    {
+                        "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
+                        "partition": {"id": "2"},
+                    },
                 ],
-                # Expected records
-                [
-                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
-                    {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
+                "use_global_cursor": False,
+            },
+            # Expected state
+            {
+                "use_global_cursor": False,
+                "lookback_window": 1,
+                "state": {"updated_at": "2024-01-25T00:00:00Z"},
+                "states": [
+                    {"cursor": {"updated_at": "2024-01-25T00:00:00Z"}, "partition": {"id": "1"}},
+                    {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
+                    {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
                 ],
-                # Initial state
-                {
-                    "state": {"updated_at": "2024-01-08T00:00:00Z"},
-                    "states": [
-                        {
-                            "cursor": {"updated_at": "2024-01-24T00:00:00Z"},
-                            "partition": {"id": "1"},
-                        },
-                        {
-                            "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
-                            "partition": {"id": "2"},
-                        },
-                    ],
-                    "use_global_cursor": False,
-                },
-                # Expected state
-                {
-                    "use_global_cursor": False,
-                    "lookback_window": 1,
-                    "state": {"updated_at": "2024-01-25T00:00:00Z"},
-                    "states": [
-                        {"cursor": {"updated_at": "2024-01-25T00:00:00Z"}, "partition": {"id": "1"}},
-                        {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
-                        {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
-                    ],
-                },
+            },
         ),
     ],
 )
 def test_incremental_list_partition_router(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test ConcurrentPerPartitionCursor with ListPartitionRouter
@@ -2622,85 +2622,85 @@ def test_incremental_list_partition_router(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_error_handling",
-                LISTPARTITION_MANIFEST,
-                [
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-20T00:00:00Z",
-                            {
-                                "comments": [
-                                    {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
-                                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                                ],
-                                "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
-                            },
-                    ),
-                    # Error response for the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
-                            None,  # Simulate a network error or an empty response
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
-                            {
-                                "comments": [
-                                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
-                                ],
-                                "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
-                    ),
+            "test_incremental_error_handling",
+            LISTPARTITION_MANIFEST,
+            [
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&start_time=2024-01-20T00:00:00Z",
+                    {
+                        "comments": [
+                            {"id": 9, "post_id": 1, "updated_at": "2023-01-01T00:00:00Z"},
+                            {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                            {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                        ],
+                        "next_page": "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
+                    },
+                ),
+                # Error response for the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts/1/comments?per_page=100&page=2&start_time=2024-01-20T00:00:00Z",
+                    None,  # Simulate a network error or an empty response
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&start_time=2024-01-21T05:00:00Z",
+                    {
+                        "comments": [
+                            {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"}
+                        ],
+                        "next_page": "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                    },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts/2/comments?per_page=100&page=2&start_time=2024-01-21T05:00:00Z",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": "2024-01-21T00:00:00Z"}]},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts/3/comments?per_page=100&start_time=2024-01-08T00:00:00Z",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"}]},
+                ),
+            ],
+            # Expected records
+            [
+                {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
+                {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
+                {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
+                {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
+            ],
+            # Initial state
+            {
+                "state": {"updated_at": "2024-01-08T00:00:00Z"},
+                "states": [
+                    {
+                        "cursor": {"updated_at": "2024-01-20T00:00:00Z"},
+                        "partition": {"id": "1"},
+                    },
+                    {
+                        "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
+                        "partition": {"id": "2"},
+                    },
                 ],
-                # Expected records
-                [
-                    {"id": 10, "post_id": 1, "updated_at": "2024-01-25T00:00:00Z"},
-                    {"id": 11, "post_id": 1, "updated_at": "2024-01-24T00:00:00Z"},
-                    {"id": 20, "post_id": 2, "updated_at": "2024-01-22T00:00:00Z"},
-                    {"id": 30, "post_id": 3, "updated_at": "2024-01-09T00:00:00Z"},
+                "use_global_cursor": False,
+            },
+            # Expected state
+            {
+                "lookback_window": 0,
+                "use_global_cursor": False,
+                "state": {"updated_at": "2024-01-08T00:00:00Z"},
+                "states": [
+                    {"cursor": {"updated_at": "2024-01-20T00:00:00Z"}, "partition": {"id": "1"}},
+                    {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
+                    {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
                 ],
-                # Initial state
-                {
-                    "state": {"updated_at": "2024-01-08T00:00:00Z"},
-                    "states": [
-                        {
-                            "cursor": {"updated_at": "2024-01-20T00:00:00Z"},
-                            "partition": {"id": "1"},
-                        },
-                        {
-                            "cursor": {"updated_at": "2024-01-21T05:00:00Z"},
-                            "partition": {"id": "2"},
-                        },
-                    ],
-                    "use_global_cursor": False,
-                },
-                # Expected state
-                {
-                    "lookback_window": 0,
-                    "use_global_cursor": False,
-                    "state": {"updated_at": "2024-01-08T00:00:00Z"},
-                    "states": [
-                        {"cursor": {"updated_at": "2024-01-20T00:00:00Z"}, "partition": {"id": "1"}},
-                        {"cursor": {"updated_at": "2024-01-22T00:00:00Z"}, "partition": {"id": "2"}},
-                        {"cursor": {"updated_at": "2024-01-09T00:00:00Z"}, "partition": {"id": "3"}},
-                    ],
-                },
+            },
         ),
     ],
 )
 def test_incremental_error(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test with failed request.
@@ -2994,261 +2994,261 @@ def test_incremental_error(
     "test_name, manifest, mock_requests, expected_records, initial_state, expected_state",
     [
         (
-                "test_incremental_parent_state",
-                SUBSTREAM_REQUEST_OPTIONS_MANIFEST,
-                [
-                    # Fetch the first page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+            "test_incremental_parent_state",
+            SUBSTREAM_REQUEST_OPTIONS_MANIFEST,
+            [
+                # Fetch the first page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}",
+                    {
+                        "posts": [
+                            {"id": 1, "updated_at": POST_1_UPDATED_AT},
+                            {"id": 2, "updated_at": POST_2_UPDATED_AT},
+                        ],
+                        "next_page": (
+                            f"https://api.example.com/community/posts"
+                            f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
+                        ),
+                    },
+                ),
+                # Fetch the second page of posts
+                (
+                    f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
+                    {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 1
+                (
+                    "https://api.example.com/community/posts_comments?per_page=100&post_id=1",
+                    {
+                        "comments": [
                             {
-                                "posts": [
-                                    {"id": 1, "updated_at": POST_1_UPDATED_AT},
-                                    {"id": 2, "updated_at": POST_2_UPDATED_AT},
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts"
-                                        f"?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2"
-                                ),
+                                "id": 9,
+                                "post_id": 1,
+                                "updated_at": COMMENT_9_OLDEST,
                             },
-                    ),
-                    # Fetch the second page of posts
-                    (
-                            f"https://api.example.com/community/posts?per_page=100&start_time={PARENT_POSTS_CURSOR}&page=2",
-                            {"posts": [{"id": 3, "updated_at": POST_3_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts_comments?per_page=100&post_id=1",
                             {
-                                "comments": [
-                                    {
-                                        "id": 9,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_9_OLDEST,
-                                    },
-                                    {
-                                        "id": 10,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_10_UPDATED_AT,
-                                    },
-                                    {
-                                        "id": 11,
-                                        "post_id": 1,
-                                        "updated_at": COMMENT_11_UPDATED_AT,
-                                    },
-                                ],
-                                "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
+                                "id": 10,
+                                "post_id": 1,
+                                "updated_at": COMMENT_10_UPDATED_AT,
                             },
-                    ),
-                    # Fetch the second page of comments for post 1
-                    (
-                            "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
-                            {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts_comments_votes?per_page=100&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
                             {
-                                "votes": [
-                                    {
-                                        "id": 100,
-                                        "comment_id": 10,
-                                        "created_at": VOTE_100_CREATED_AT,
-                                    }
-                                ],
-                                "next_page": (
-                                        f"https://api.example.com/community/posts_comments_votes"
-                                        f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
-                                ),
+                                "id": 11,
+                                "post_id": 1,
+                                "updated_at": COMMENT_11_UPDATED_AT,
                             },
-                    ),
-                    # Fetch the second page of votes for comment 10 of post 1
-                    (
-                            f"https://api.example.com/community/posts_comments_votes"
-                            f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
-                            {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 11 of post 1
-                    (
-                            f"https://api.example.com/community/posts_comments_votes"
-                            f"?per_page=100&comment_id=11&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
-                            {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 12 of post 1
-                    (
-                            f"https://api.example.com/community/posts_comments_votes?"
-                            f"per_page=100&comment_id=12&start_time={LOOKBACK_DATE}",
-                            {"votes": []},
-                    ),
-                    # Fetch the first page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts_comments?per_page=100&post_id=2",
+                        ],
+                        "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
+                    },
+                ),
+                # Fetch the second page of comments for post 1
+                (
+                    "https://api.example.com/community/posts_comments?per_page=100&post_id=1&page=2",
+                    {"comments": [{"id": 12, "post_id": 1, "updated_at": COMMENT_12_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts_comments_votes?per_page=100&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {
+                        "votes": [
                             {
-                                "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
-                                "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
-                            },
-                    ),
-                    # Fetch the second page of comments for post 2
-                    (
-                            "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
-                            {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 20 of post 2
-                    (
+                                "id": 100,
+                                "comment_id": 10,
+                                "created_at": VOTE_100_CREATED_AT,
+                            }
+                        ],
+                        "next_page": (
                             f"https://api.example.com/community/posts_comments_votes"
-                            f"?per_page=100&comment_id=20&start_time={LOOKBACK_DATE}",
-                            {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 21 of post 2
-                    (
-                            f"https://api.example.com/community/posts_comments_votes?"
-                            f"per_page=100&comment_id=21&start_time={LOOKBACK_DATE}",
-                            {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
-                    ),
-                    # Fetch the first page of comments for post 3
-                    (
-                            "https://api.example.com/community/posts_comments?per_page=100&post_id=3",
-                            {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
-                    ),
-                    # Fetch the first page of votes for comment 30 of post 3
-                    (
-                            f"https://api.example.com/community/posts_comments_votes?"
-                            f"per_page=100&comment_id=30&start_time={LOOKBACK_DATE}",
-                            {"votes": [{"id": 300, "comment_id": 30, "created_at": VOTE_300_CREATED_AT}]},
-                    ),
-                ],
-                # Expected records
-                [
+                            f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}"
+                        ),
+                    },
+                ),
+                # Fetch the second page of votes for comment 10 of post 1
+                (
+                    f"https://api.example.com/community/posts_comments_votes"
+                    f"?per_page=100&page=2&comment_id=10&start_time={INITIAL_STATE_PARTITION_10_CURSOR}",
+                    {"votes": [{"id": 101, "comment_id": 10, "created_at": VOTE_101_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 11 of post 1
+                (
+                    f"https://api.example.com/community/posts_comments_votes"
+                    f"?per_page=100&comment_id=11&start_time={INITIAL_STATE_PARTITION_11_CURSOR}",
+                    {"votes": [{"id": 111, "comment_id": 11, "created_at": VOTE_111_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 12 of post 1
+                (
+                    f"https://api.example.com/community/posts_comments_votes?"
+                    f"per_page=100&comment_id=12&start_time={LOOKBACK_DATE}",
+                    {"votes": []},
+                ),
+                # Fetch the first page of comments for post 2
+                (
+                    "https://api.example.com/community/posts_comments?per_page=100&post_id=2",
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_100_CREATED_AT,
-                        "id": 100,
+                        "comments": [{"id": 20, "post_id": 2, "updated_at": COMMENT_20_UPDATED_AT}],
+                        "next_page": "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
                     },
+                ),
+                # Fetch the second page of comments for post 2
+                (
+                    "https://api.example.com/community/posts_comments?per_page=100&post_id=2&page=2",
+                    {"comments": [{"id": 21, "post_id": 2, "updated_at": COMMENT_21_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 20 of post 2
+                (
+                    f"https://api.example.com/community/posts_comments_votes"
+                    f"?per_page=100&comment_id=20&start_time={LOOKBACK_DATE}",
+                    {"votes": [{"id": 200, "comment_id": 20, "created_at": VOTE_200_CREATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 21 of post 2
+                (
+                    f"https://api.example.com/community/posts_comments_votes?"
+                    f"per_page=100&comment_id=21&start_time={LOOKBACK_DATE}",
+                    {"votes": [{"id": 210, "comment_id": 21, "created_at": VOTE_210_CREATED_AT}]},
+                ),
+                # Fetch the first page of comments for post 3
+                (
+                    "https://api.example.com/community/posts_comments?per_page=100&post_id=3",
+                    {"comments": [{"id": 30, "post_id": 3, "updated_at": COMMENT_30_UPDATED_AT}]},
+                ),
+                # Fetch the first page of votes for comment 30 of post 3
+                (
+                    f"https://api.example.com/community/posts_comments_votes?"
+                    f"per_page=100&comment_id=30&start_time={LOOKBACK_DATE}",
+                    {"votes": [{"id": 300, "comment_id": 30, "created_at": VOTE_300_CREATED_AT}]},
+                ),
+            ],
+            # Expected records
+            [
+                {
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_100_CREATED_AT,
+                    "id": 100,
+                },
+                {
+                    "comment_id": 10,
+                    "comment_updated_at": COMMENT_10_UPDATED_AT,
+                    "created_at": VOTE_101_CREATED_AT,
+                    "id": 101,
+                },
+                {
+                    "comment_id": 11,
+                    "comment_updated_at": COMMENT_11_UPDATED_AT,
+                    "created_at": VOTE_111_CREATED_AT,
+                    "id": 111,
+                },
+                {
+                    "comment_id": 20,
+                    "comment_updated_at": COMMENT_20_UPDATED_AT,
+                    "created_at": VOTE_200_CREATED_AT,
+                    "id": 200,
+                },
+                {
+                    "comment_id": 21,
+                    "comment_updated_at": COMMENT_21_UPDATED_AT,
+                    "created_at": VOTE_210_CREATED_AT,
+                    "id": 210,
+                },
+                {
+                    "comment_id": 30,
+                    "comment_updated_at": COMMENT_30_UPDATED_AT,
+                    "created_at": VOTE_300_CREATED_AT,
+                    "id": 300,
+                },
+            ],
+            # Initial state
+            {
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
+                },
+                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                "states": [
                     {
-                        "comment_id": 10,
-                        "comment_updated_at": COMMENT_10_UPDATED_AT,
-                        "created_at": VOTE_101_CREATED_AT,
-                        "id": 101,
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
+                        },
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
                     },
                     {
-                        "comment_id": 11,
-                        "comment_updated_at": COMMENT_11_UPDATED_AT,
-                        "created_at": VOTE_111_CREATED_AT,
-                        "id": 111,
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
+                        },
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
                     },
+                ],
+                "lookback_window": 86400,
+            },
+            # Expected state
+            {
+                "state": {"created_at": VOTE_100_CREATED_AT},
+                "parent_state": {
+                    "post_comments": {
+                        "use_global_cursor": False,
+                        "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
+                        "parent_state": {
+                            "posts": {"updated_at": POST_1_UPDATED_AT}
+                        },  # post 1 is the latest
+                        "lookback_window": 1,
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
+                            },
+                            {
+                                "partition": {"id": 2, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
+                            },
+                            {
+                                "partition": {"id": 3, "parent_slice": {}},
+                                "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
+                            },
+                        ],
+                    }
+                },
+                "lookback_window": 1,
+                "use_global_cursor": False,
+                "states": [
                     {
-                        "comment_id": 20,
-                        "comment_updated_at": COMMENT_20_UPDATED_AT,
-                        "created_at": VOTE_200_CREATED_AT,
-                        "id": 200,
+                        "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_100_CREATED_AT},
                     },
                     {
-                        "comment_id": 21,
-                        "comment_updated_at": COMMENT_21_UPDATED_AT,
-                        "created_at": VOTE_210_CREATED_AT,
-                        "id": 210,
+                        "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_111_CREATED_AT},
                     },
                     {
-                        "comment_id": 30,
-                        "comment_updated_at": COMMENT_30_UPDATED_AT,
-                        "created_at": VOTE_300_CREATED_AT,
-                        "id": 300,
+                        "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
+                        "cursor": {"created_at": LOOKBACK_DATE},
                     },
-                ],
-                # Initial state
-                {
-                    "parent_state": {
-                        "post_comments": {
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                                }
-                            ],
-                            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-                        }
+                    {
+                        "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_200_CREATED_AT},
                     },
-                    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-                    "states": [
-                        {
-                            "partition": {
-                                "id": 10,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-                        },
-                        {
-                            "partition": {
-                                "id": 11,
-                                "parent_slice": {"id": 1, "parent_slice": {}},
-                            },
-                            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-                        },
-                    ],
-                    "lookback_window": 86400,
-                },
-                # Expected state
-                {
-                    "state": {"created_at": VOTE_100_CREATED_AT},
-                    "parent_state": {
-                        "post_comments": {
-                            "use_global_cursor": False,
-                            "state": {"updated_at": COMMENT_10_UPDATED_AT},  # 10 is the "latest"
-                            "parent_state": {
-                                "posts": {"updated_at": POST_1_UPDATED_AT}
-                            },  # post 1 is the latest
-                            "lookback_window": 1,
-                            "states": [
-                                {
-                                    "partition": {"id": 1, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_10_UPDATED_AT},
-                                },
-                                {
-                                    "partition": {"id": 2, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_20_UPDATED_AT},
-                                },
-                                {
-                                    "partition": {"id": 3, "parent_slice": {}},
-                                    "cursor": {"updated_at": COMMENT_30_UPDATED_AT},
-                                },
-                            ],
-                        }
+                    {
+                        "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_210_CREATED_AT},
                     },
-                    "lookback_window": 1,
-                    "use_global_cursor": False,
-                    "states": [
-                        {
-                            "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_100_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_111_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                            "cursor": {"created_at": LOOKBACK_DATE},
-                        },
-                        {
-                            "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_200_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_210_CREATED_AT},
-                        },
-                        {
-                            "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                            "cursor": {"created_at": VOTE_300_CREATED_AT},
-                        },
-                    ],
-                },
+                    {
+                        "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
+                        "cursor": {"created_at": VOTE_300_CREATED_AT},
+                    },
+                ],
+            },
         ),
     ],
 )
 def test_incremental_substream_request_options_provider(
-        test_name, manifest, mock_requests, expected_records, initial_state, expected_state
+    test_name, manifest, mock_requests, expected_records, initial_state, expected_state
 ):
     """
     Test incremental syncing for a stream that uses request options provider from parent stream config.

From eff25eccca6826168979384174f38a7be92f3f64 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Wed, 19 Feb 2025 18:49:08 +0200
Subject: [PATCH 23/26] Add unit tests

---
 .../test_concurrent_perpartitioncursor.py     | 616 +++++++++---------
 1 file changed, 297 insertions(+), 319 deletions(-)

diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index f650847a6..084c31142 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -20,6 +20,14 @@
     ConcurrentDeclarativeSource,
 )
 from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
+from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
+    DeclarativePartition,
+)
+from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
+from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
+    CustomFormatConcurrentStreamStateConverter,
+)
+from airbyte_cdk.sources.types import StreamSlice
 from airbyte_cdk.test.catalog_builder import CatalogBuilder, ConfiguredAirbyteStreamBuilder
 from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
 
@@ -765,7 +773,6 @@ def run_incremental_parent_state_test(
     mock_requests,
     expected_records,
     num_intermediate_states,
-    intermidiate_states,
     initial_state,
     expected_states,
 ):
@@ -785,7 +792,6 @@ def run_incremental_parent_state_test(
         mock_requests (list): A list of tuples containing URL and response data for mocking API requests.
         expected_records (list): The expected records to compare against the output.
         num_intermediate_states (int): The number of intermediate states to expect.
-        intermidiate_states (list): A list of intermediate states to assert
         initial_state (list): The initial state to start the read operation.
         expected_states (list): A list of expected final states after the read operation.
     """
@@ -832,12 +838,6 @@ def run_incremental_parent_state_test(
         # Assert that the number of intermediate states is as expected
         assert len(intermediate_states) - 1 == num_intermediate_states
 
-        # Extract just the Python dict from each state message
-        all_state_dicts = [st[0].stream.stream_state.__dict__ for st in intermediate_states]
-
-        for idx, itermidiate_state in enumerate(all_state_dicts):
-            assert itermidiate_state == intermidiate_states[idx], idx
-
         # For each intermediate state, perform another read starting from that state
         for state, records_before_state in intermediate_states[:-1]:
             output_intermediate = _run_read(manifest, CONFIG, STREAM_NAME, [state])
@@ -874,313 +874,8 @@ def run_incremental_parent_state_test(
             ), f"Final state mismatch at run {i + 1}. Expected {expected_states}, got {final_state}"
 
 
-INITIAL_STATE = {
-    "parent_state": {
-        "post_comments": {
-            "states": [
-                {
-                    "partition": {"id": 1, "parent_slice": {}},
-                    "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
-                }
-            ],
-            "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
-        }
-    },
-    "state": {"created_at": INITIAL_GLOBAL_CURSOR},
-    "states": [
-        {
-            "partition": {
-                "id": 10,
-                "parent_slice": {"id": 1, "parent_slice": {}},
-            },
-            "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
-        },
-        {
-            "partition": {
-                "id": 11,
-                "parent_slice": {"id": 1, "parent_slice": {}},
-            },
-            "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
-        },
-    ],
-    "lookback_window": 86400,
-}
-
-INTERMEDIATE_STATES = [
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-03T00:00:02Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-03T00:00:02Z"},
-        "lookback_window": 86400,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {},
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2023-01-04T00:00:00Z"},
-                    }
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
-            }
-        },
-    },
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-03T00:00:02Z"},
-        "lookback_window": 86400,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {},
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2023-01-04T00:00:00Z"},
-                    }
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
-            }
-        },
-    },
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-03T00:00:02Z"},
-        "lookback_window": 86400,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {},
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2023-01-04T00:00:00Z"},
-                    }
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
-            }
-        },
-    },
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
-            },
-            {
-                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-03T00:00:02Z"},
-        "lookback_window": 86400,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {},
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
-                    }
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
-            }
-        },
-    },
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
-            },
-            {
-                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:15Z"},
-            },
-            {
-                "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-03T00:00:02Z"},
-        "lookback_window": 86400,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {},
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
-                    }
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
-            }
-        },
-    },
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
-            },
-            {
-                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:15Z"},
-            },
-            {
-                "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-10T00:00:00Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-03T00:00:02Z"},
-        "lookback_window": 86400,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {},
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
-                    },
-                    {
-                        "partition": {"id": 2, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-22T00:00:00Z"},
-                    },
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},
-            }
-        },
-    },
-    {
-        "use_global_cursor": False,
-        "states": [
-            {
-                "partition": {"id": 10, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-15T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 11, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-13T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-02T00:00:02Z"},
-            },
-            {
-                "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:00Z"},
-            },
-            {
-                "partition": {"id": 21, "parent_slice": {"id": 2, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-12T00:00:15Z"},
-            },
-            {
-                "partition": {"id": 30, "parent_slice": {"id": 3, "parent_slice": {}}},
-                "cursor": {"created_at": "2024-01-10T00:00:00Z"},
-            },
-        ],
-        "state": {"created_at": "2024-01-15T00:00:00Z"},
-        "lookback_window": 1,
-        "parent_state": {
-            "post_comments": {
-                "use_global_cursor": False,
-                "state": {"updated_at": "2024-01-25T00:00:00Z"},
-                "lookback_window": 1,
-                "states": [
-                    {
-                        "partition": {"id": 1, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-25T00:00:00Z"},
-                    },
-                    {
-                        "partition": {"id": 2, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-22T00:00:00Z"},
-                    },
-                    {
-                        "partition": {"id": 3, "parent_slice": {}},
-                        "cursor": {"updated_at": "2024-01-09T00:00:00Z"},
-                    },
-                ],
-                "parent_state": {"posts": {"updated_at": "2024-01-30T00:00:00Z"}},
-            }
-        },
-    },
-]
-
-
 @pytest.mark.parametrize(
-    "test_name, manifest, mock_requests, expected_records, num_intermediate_states, intermidiate_states, initial_state, expected_state",
+    "test_name, manifest, mock_requests, expected_records, num_intermediate_states, initial_state, expected_state",
     [
         (
             "test_incremental_parent_state",
@@ -1396,10 +1091,38 @@ def run_incremental_parent_state_test(
             ],
             # Number of intermediate states - 6 as number of parent partitions
             6,
-            # Intermediate states
-            INTERMEDIATE_STATES,
             # Initial state
-            INITIAL_STATE,
+            {
+                "parent_state": {
+                    "post_comments": {
+                        "states": [
+                            {
+                                "partition": {"id": 1, "parent_slice": {}},
+                                "cursor": {"updated_at": PARENT_COMMENT_CURSOR_PARTITION_1},
+                            }
+                        ],
+                        "parent_state": {"posts": {"updated_at": PARENT_POSTS_CURSOR}},
+                    }
+                },
+                "state": {"created_at": INITIAL_GLOBAL_CURSOR},
+                "states": [
+                    {
+                        "partition": {
+                            "id": 10,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
+                        },
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_10_CURSOR},
+                    },
+                    {
+                        "partition": {
+                            "id": 11,
+                            "parent_slice": {"id": 1, "parent_slice": {}},
+                        },
+                        "cursor": {"created_at": INITIAL_STATE_PARTITION_11_CURSOR},
+                    },
+                ],
+                "lookback_window": 86400,
+            },
             # Expected state
             {
                 "state": {"created_at": VOTE_100_CREATED_AT},
@@ -1465,7 +1188,6 @@ def test_incremental_parent_state(
     mock_requests,
     expected_records,
     num_intermediate_states,
-    intermidiate_states,
     initial_state,
     expected_state,
 ):
@@ -1478,7 +1200,6 @@ def test_incremental_parent_state(
             mock_requests,
             expected_records,
             num_intermediate_states,
-            intermidiate_states,
             initial_state,
             [expected_state],
         )
@@ -3306,3 +3027,260 @@ def test_state_throttling(mocker):
     cursor._emit_state_message()
     mock_connector_manager.update_state_for_stream.assert_called_once()
     mock_repo.emit_message.assert_called_once()
+
+
+def test_given_no_partitions_processed_when_close_partition_then_no_state_update():
+    mock_cursor = MagicMock()
+    # No slices for no partitions
+    mock_cursor.stream_slices.side_effect = [iter([])]
+    mock_cursor.state = {}  # Empty state for no partitions
+
+    cursor_factory_mock = MagicMock()
+    cursor_factory_mock.create.return_value = mock_cursor
+
+    connector_state_converter = CustomFormatConcurrentStreamStateConverter(
+        datetime_format="%Y-%m-%dT%H:%M:%SZ",
+        input_datetime_formats=["%Y-%m-%dT%H:%M:%SZ"],
+        is_sequential_state=True,
+        cursor_granularity=timedelta(0),
+    )
+
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=cursor_factory_mock,
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={},
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=connector_state_converter,
+        cursor_field=CursorField(cursor_field_key="updated_at"),
+    )
+    partition_router = cursor._partition_router
+    partition_router.stream_slices.return_value = iter([])
+    partition_router.get_stream_state.return_value = {}
+
+    slices = list(cursor.stream_slices())  # Call once
+    for slice in slices:
+        cursor.close_partition(
+            DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
+        )
+
+    assert cursor.state == {
+        "use_global_cursor": False,
+        "lookback_window": 0,
+        "states": [],
+    }
+    assert len(cursor._cursor_per_partition) == 0
+    assert len(cursor._semaphore_per_partition) == 0
+    assert len(cursor._partition_parent_state_map) == 0
+    assert mock_cursor.stream_slices.call_count == 0  # No calls since no partitions
+
+
+def test_given_new_partition_mid_sync_when_close_partition_then_update_state():
+    mock_cursor = MagicMock()
+    # Simulate one slice per cursor
+    mock_cursor.stream_slices.side_effect = [
+        iter(
+            [
+                {"slice1": "data1"},
+                {"slice2": "data1"},  # First slice
+            ]
+        ),
+        iter(
+            [
+                {"slice2": "data2"},
+                {"slice2": "data2"},  # First slice for new partition
+            ]
+        ),
+    ]
+    mock_cursor.state = {"updated_at": "2024-01-03T00:00:00Z"}  # Set cursor state
+
+    connector_state_converter = CustomFormatConcurrentStreamStateConverter(
+        datetime_format="%Y-%m-%dT%H:%M:%SZ",
+        input_datetime_formats=["%Y-%m-%dT%H:%M:%SZ"],
+        is_sequential_state=True,
+        cursor_granularity=timedelta(0),
+    )
+
+    cursor_factory_mock = MagicMock()
+    cursor_factory_mock.create.return_value = mock_cursor
+
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=cursor_factory_mock,
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={
+            "states": [
+                {"partition": {"id": "1"}, "cursor": {"updated_at": "2024-01-01T00:00:00Z"}}
+            ],
+            "state": {"updated_at": "2024-01-01T00:00:00Z"},
+            "lookback_window": 86400,
+            "parent_state": {"posts": {"updated_at": "2024-01-01T00:00:00Z"}},
+        },
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=connector_state_converter,
+        cursor_field=CursorField(cursor_field_key="updated_at"),
+    )
+    partition_router = cursor._partition_router
+    all_partitions = [
+        StreamSlice(partition={"id": "1"}, cursor_slice={}, extra_fields={}),
+        StreamSlice(partition={"id": "2"}, cursor_slice={}, extra_fields={}),  # New partition
+    ]
+    partition_router.stream_slices.return_value = iter(all_partitions)
+    partition_router.get_stream_state.side_effect = [
+        {"posts": {"updated_at": "2024-01-04T00:00:00Z"}},  # Initial parent state
+        {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},  # Updated parent state for new partition
+    ]
+
+    slices = list(cursor.stream_slices())
+    # Close all partitions except from the first one
+    for slice in slices:
+        cursor.close_partition(
+            DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
+        )
+
+    state = cursor.state
+    assert state["use_global_cursor"] is False
+    assert len(state["states"]) == 2  # Should now have two partitions
+    assert any(p["partition"]["id"] == "1" for p in state["states"])
+    assert any(p["partition"]["id"] == "2" for p in state["states"])
+    assert state["parent_state"] == {"posts": {"updated_at": "2024-01-05T00:00:00Z"}}
+    assert state["lookback_window"] == 86400
+    assert mock_cursor.stream_slices.call_count == 2  # Called once for each partition
+
+
+def test_given_all_partitions_finished_when_close_partition_then_final_state_emitted():
+    mock_cursor = MagicMock()
+    # Simulate one slice per cursor
+    mock_cursor.stream_slices.side_effect = [
+        iter(
+            [
+                {"slice1": "data"},  # First slice for partition 1
+            ]
+        ),
+        iter(
+            [
+                {"slice2": "data"},  # First slice for partition 2
+            ]
+        ),
+    ]
+    mock_cursor.state = {"updated_at": "2024-01-02T00:00:00Z"}  # Set cursor state (latest)
+
+    cursor_factory_mock = MagicMock()
+    cursor_factory_mock.create.return_value = mock_cursor
+
+    connector_state_converter = CustomFormatConcurrentStreamStateConverter(
+        datetime_format="%Y-%m-%dT%H:%M:%SZ",
+        input_datetime_formats=["%Y-%m-%dT%H:%M:%SZ"],
+        is_sequential_state=True,
+        cursor_granularity=timedelta(0),
+    )
+
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=cursor_factory_mock,
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={
+            "states": [
+                {"partition": {"id": "1"}, "cursor": {"updated_at": "2024-01-01T00:00:00Z"}},
+                {"partition": {"id": "2"}, "cursor": {"updated_at": "2024-01-02T00:00:00Z"}},
+            ],
+            "state": {"updated_at": "2024-01-02T00:00:00Z"},
+            "lookback_window": 86400,
+            "parent_state": {"posts": {"updated_at": "2024-01-03T00:00:00Z"}},
+        },
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=connector_state_converter,
+        cursor_field=CursorField(cursor_field_key="updated_at"),
+    )
+    partition_router = cursor._partition_router
+    partitions = [
+        StreamSlice(partition={"id": "1"}, cursor_slice={}, extra_fields={}),
+        StreamSlice(partition={"id": "2"}, cursor_slice={}, extra_fields={}),
+    ]
+    partition_router.stream_slices.return_value = iter(partitions)
+    partition_router.get_stream_state.return_value = {
+        "posts": {"updated_at": "2024-01-06T00:00:00Z"}
+    }
+
+    slices = list(cursor.stream_slices())
+    for slice in slices:
+        cursor.close_partition(
+            DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
+        )
+
+    cursor.ensure_at_least_one_state_emitted()
+
+    final_state = cursor.state
+    assert final_state["use_global_cursor"] is False
+    assert len(final_state["states"]) == 2
+    assert final_state["state"]["updated_at"] == "2024-01-02T00:00:00Z"
+    assert final_state["parent_state"] == {"posts": {"updated_at": "2024-01-06T00:00:00Z"}}
+    assert final_state["lookback_window"] == 1
+    assert cursor._message_repository.emit_message.call_count == 2
+    assert mock_cursor.stream_slices.call_count == 2  # Called once for each partition
+
+
+def test_given_partition_limit_exceeded_when_close_partition_then_switch_to_global_cursor():
+    mock_cursor = MagicMock()
+    # Simulate one slice per cursor
+    mock_cursor.stream_slices.side_effect = [iter([{"slice" + str(i): "data"}]) for i in range(3)]
+    mock_cursor.state = {"updated_at": "2024-01-01T00:00:00Z"}  # Set cursor state
+
+    cursor_factory_mock = MagicMock()
+    cursor_factory_mock.create.return_value = mock_cursor
+
+    connector_state_converter = CustomFormatConcurrentStreamStateConverter(
+        datetime_format="%Y-%m-%dT%H:%M:%SZ",
+        input_datetime_formats=["%Y-%m-%dT%H:%M:%SZ"],
+        is_sequential_state=True,
+        cursor_granularity=timedelta(0),
+    )
+
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=cursor_factory_mock,
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={},
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=connector_state_converter,
+        cursor_field=CursorField(cursor_field_key="updated_at"),
+    )
+    # Override default limit for testing
+    cursor.DEFAULT_MAX_PARTITIONS_NUMBER = 2
+    cursor.SWITCH_TO_GLOBAL_LIMIT = 1
+
+    partition_router = cursor._partition_router
+    partitions = [
+        StreamSlice(partition={"id": str(i)}, cursor_slice={}, extra_fields={}) for i in range(3)
+    ]  # 3 partitions
+    partition_router.stream_slices.return_value = iter(partitions)
+    partition_router.get_stream_state.side_effect = [
+        {"updated_at": "2024-01-02T00:00:00Z"},
+        {"updated_at": "2024-01-03T00:00:00Z"},
+        {"updated_at": "2024-01-04T00:00:00Z"},
+        {"updated_at": "2024-01-04T00:00:00Z"},
+    ]
+
+    slices = list(cursor.stream_slices())
+    for slice in slices:
+        cursor.close_partition(
+            DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
+        )
+    cursor.ensure_at_least_one_state_emitted()
+
+    final_state = cursor.state
+    assert len(slices) == 3
+    assert final_state["use_global_cursor"] is True
+    assert len(final_state.get("states", [])) == 0  # No per-partition states
+    assert final_state["parent_state"] == {"updated_at": "2024-01-04T00:00:00Z"}
+    assert "lookback_window" in final_state
+    assert len(cursor._cursor_per_partition) <= cursor.DEFAULT_MAX_PARTITIONS_NUMBER
+    assert mock_cursor.stream_slices.call_count == 3  # Called once for each partition

From c51f8406460cf618fcfad2cda4cb87178b2ea247 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Fri, 21 Feb 2025 15:03:07 +0200
Subject: [PATCH 24/26] Update unit tests

---
 .../test_concurrent_perpartitioncursor.py     | 147 ++++++++++++++----
 1 file changed, 120 insertions(+), 27 deletions(-)

diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index 084c31142..bbed04a81 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -3077,24 +3077,29 @@ def test_given_no_partitions_processed_when_close_partition_then_no_state_update
     assert mock_cursor.stream_slices.call_count == 0  # No calls since no partitions
 
 
-def test_given_new_partition_mid_sync_when_close_partition_then_update_state():
-    mock_cursor = MagicMock()
-    # Simulate one slice per cursor
-    mock_cursor.stream_slices.side_effect = [
-        iter(
-            [
-                {"slice1": "data1"},
-                {"slice2": "data1"},  # First slice
-            ]
-        ),
-        iter(
-            [
-                {"slice2": "data2"},
-                {"slice2": "data2"},  # First slice for new partition
-            ]
-        ),
-    ]
-    mock_cursor.state = {"updated_at": "2024-01-03T00:00:00Z"}  # Set cursor state
+def test_given_unfinished_first_parent_partition_no_parent_state_update():
+    # Create two mock cursors with different states for each partition
+    mock_cursor_1 = MagicMock()
+    mock_cursor_1.stream_slices.return_value = iter(
+        [
+            {"slice1": "data1"},
+            {"slice2": "data1"},  # First partition slices
+        ]
+    )
+    mock_cursor_1.state = {"updated_at": "2024-01-01T00:00:00Z"}  # State for partition "1"
+
+    mock_cursor_2 = MagicMock()
+    mock_cursor_2.stream_slices.return_value = iter(
+        [
+            {"slice2": "data2"},
+            {"slice2": "data2"},  # Second partition slices
+        ]
+    )
+    mock_cursor_2.state = {"updated_at": "2024-01-02T00:00:00Z"}  # State for partition "2"
+
+    # Configure cursor factory to return different mock cursors based on partition
+    cursor_factory_mock = MagicMock()
+    cursor_factory_mock.create.side_effect = [mock_cursor_1, mock_cursor_2]
 
     connector_state_converter = CustomFormatConcurrentStreamStateConverter(
         datetime_format="%Y-%m-%dT%H:%M:%SZ",
@@ -3103,8 +3108,89 @@ def test_given_new_partition_mid_sync_when_close_partition_then_update_state():
         cursor_granularity=timedelta(0),
     )
 
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=cursor_factory_mock,
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={
+            "states": [
+                {"partition": {"id": "1"}, "cursor": {"updated_at": "2024-01-01T00:00:00Z"}}
+            ],
+            "state": {"updated_at": "2024-01-01T00:00:00Z"},
+            "lookback_window": 86400,
+            "parent_state": {"posts": {"updated_at": "2024-01-01T00:00:00Z"}},
+        },
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=connector_state_converter,
+        cursor_field=CursorField(cursor_field_key="updated_at"),
+    )
+    partition_router = cursor._partition_router
+    all_partitions = [
+        StreamSlice(partition={"id": "1"}, cursor_slice={}, extra_fields={}),
+        StreamSlice(partition={"id": "2"}, cursor_slice={}, extra_fields={}),  # New partition
+    ]
+    partition_router.stream_slices.return_value = iter(all_partitions)
+    partition_router.get_stream_state.side_effect = [
+        {"posts": {"updated_at": "2024-01-04T00:00:00Z"}},  # Initial parent state
+        {"posts": {"updated_at": "2024-01-05T00:00:00Z"}},  # Updated parent state for new partition
+    ]
+
+    slices = list(cursor.stream_slices())
+    # Close all partitions except from the first one
+    for slice in slices[1:]:
+        cursor.close_partition(
+            DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
+        )
+    cursor.ensure_at_least_one_state_emitted()
+    print(cursor.state)
+
+    state = cursor.state
+    assert state == {
+        "use_global_cursor": False,
+        "states": [
+            {"partition": {"id": "1"}, "cursor": {"updated_at": "2024-01-01T00:00:00Z"}},
+            {"partition": {"id": "2"}, "cursor": {"updated_at": "2024-01-02T00:00:00Z"}},
+        ],
+        "state": {"updated_at": "2024-01-01T00:00:00Z"},
+        "lookback_window": 86400,
+        "parent_state": {"posts": {"updated_at": "2024-01-01T00:00:00Z"}},
+    }
+    assert mock_cursor_1.stream_slices.call_count == 1  # Called once for each partition
+    assert mock_cursor_2.stream_slices.call_count == 1  # Called once for each partition
+
+
+def test_given_unfinished_last_parent_partition_with_partial_parent_state_update():
+    # Create two mock cursors with different states for each partition
+    mock_cursor_1 = MagicMock()
+    mock_cursor_1.stream_slices.return_value = iter(
+        [
+            {"slice1": "data1"},
+            {"slice2": "data1"},  # First partition slices
+        ]
+    )
+    mock_cursor_1.state = {"updated_at": "2024-01-02T00:00:00Z"}  # State for partition "1"
+
+    mock_cursor_2 = MagicMock()
+    mock_cursor_2.stream_slices.return_value = iter(
+        [
+            {"slice2": "data2"},
+            {"slice2": "data2"},  # Second partition slices
+        ]
+    )
+    mock_cursor_2.state = {"updated_at": "2024-01-01T00:00:00Z"}  # State for partition "2"
+
+    # Configure cursor factory to return different mock cursors based on partition
     cursor_factory_mock = MagicMock()
-    cursor_factory_mock.create.return_value = mock_cursor
+    cursor_factory_mock.create.side_effect = [mock_cursor_1, mock_cursor_2]
+
+    connector_state_converter = CustomFormatConcurrentStreamStateConverter(
+        datetime_format="%Y-%m-%dT%H:%M:%SZ",
+        input_datetime_formats=["%Y-%m-%dT%H:%M:%SZ"],
+        is_sequential_state=True,
+        cursor_granularity=timedelta(0),
+    )
 
     cursor = ConcurrentPerPartitionCursor(
         cursor_factory=cursor_factory_mock,
@@ -3137,19 +3223,26 @@ def test_given_new_partition_mid_sync_when_close_partition_then_update_state():
 
     slices = list(cursor.stream_slices())
     # Close all partitions except from the first one
-    for slice in slices:
+    for slice in slices[:-1]:
         cursor.close_partition(
             DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
         )
+    cursor.ensure_at_least_one_state_emitted()
+    print(cursor.state)
 
     state = cursor.state
-    assert state["use_global_cursor"] is False
-    assert len(state["states"]) == 2  # Should now have two partitions
-    assert any(p["partition"]["id"] == "1" for p in state["states"])
-    assert any(p["partition"]["id"] == "2" for p in state["states"])
-    assert state["parent_state"] == {"posts": {"updated_at": "2024-01-05T00:00:00Z"}}
-    assert state["lookback_window"] == 86400
-    assert mock_cursor.stream_slices.call_count == 2  # Called once for each partition
+    assert state == {
+        "use_global_cursor": False,
+        "states": [
+            {"partition": {"id": "1"}, "cursor": {"updated_at": "2024-01-02T00:00:00Z"}},
+            {"partition": {"id": "2"}, "cursor": {"updated_at": "2024-01-01T00:00:00Z"}},
+        ],
+        "state": {"updated_at": "2024-01-01T00:00:00Z"},
+        "lookback_window": 86400,
+        "parent_state": {"posts": {"updated_at": "2024-01-04T00:00:00Z"}},
+    }
+    assert mock_cursor_1.stream_slices.call_count == 1  # Called once for each partition
+    assert mock_cursor_2.stream_slices.call_count == 1  # Called once for each partition
 
 
 def test_given_all_partitions_finished_when_close_partition_then_final_state_emitted():

From 4a18954a55372a0fab521065d2ecbc29d0a12104 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Fri, 21 Feb 2025 15:55:58 +0200
Subject: [PATCH 25/26] Add deleting finished semaphores

---
 .../concurrent_partition_cursor.py            | 28 +++++--
 .../test_concurrent_perpartitioncursor.py     | 75 +++++++++++++++++++
 2 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index 1ece3c579..3532b4e67 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -169,6 +169,8 @@ def _check_and_update_parent_state(self) -> None:
         Pop the leftmost partition state from _partition_parent_state_map only if
         *all partitions* up to (and including) that partition key in _semaphore_per_partition
         are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
+        Additionally, delete finished semaphores with a value of 0 to free up memory,
+        as they are only needed to track errors and completion status.
         """
         last_closed_state = None
 
@@ -178,7 +180,9 @@ def _check_and_update_parent_state(self) -> None:
 
             # Verify ALL partitions from the left up to earliest_key are finished
             all_left_finished = True
-            for p_key, sem in self._semaphore_per_partition.items():
+            for p_key, sem in list(
+                self._semaphore_per_partition.items()
+            ):  # Use list to allow modification during iteration
                 # If any earlier partition is still not finished, we must stop
                 if p_key not in self._finished_partitions or sem._value != 0:
                     all_left_finished = False
@@ -191,17 +195,26 @@ def _check_and_update_parent_state(self) -> None:
             if not all_left_finished:
                 break
 
-            # Otherwise, pop the leftmost entry from parent-state map
+            # Pop the leftmost entry from parent-state map
             _, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
             last_closed_state = closed_parent_state
 
-        # Update _parent_state if we actually popped at least one partition
+            # Clean up finished semaphores with value 0 up to and including earliest_key
+            for p_key in list(self._semaphore_per_partition.keys()):
+                sem = self._semaphore_per_partition[p_key]
+                if p_key in self._finished_partitions and sem._value == 0:
+                    del self._semaphore_per_partition[p_key]
+                    logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
+                if p_key == earliest_key:
+                    break
+
+        # Update _parent_state if we popped at least one partition
         if last_closed_state is not None:
             self._parent_state = last_closed_state
 
     def ensure_at_least_one_state_emitted(self) -> None:
         """
-        The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
+        The platform expects at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
         called.
         """
         if not any(
@@ -238,6 +251,7 @@ def _emit_state_message(self, throttle: bool = True) -> None:
         self._message_repository.emit_message(state_message)
 
     def stream_slices(self) -> Iterable[StreamSlice]:
+        print("stream_slices")
         if self._timer.is_running():
             raise RuntimeError("stream_slices has been executed more than once.")
 
@@ -313,9 +327,9 @@ def _ensure_partition_limit(self) -> None:
             while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
                 # Try removing finished partitions first
                 for partition_key in list(self._cursor_per_partition.keys()):
-                    if (
-                        partition_key in self._finished_partitions
-                        and self._semaphore_per_partition[partition_key]._value == 0
+                    if partition_key in self._finished_partitions and (
+                        partition_key not in self._semaphore_per_partition
+                        or self._semaphore_per_partition[partition_key]._value == 0
                     ):
                         oldest_partition = self._cursor_per_partition.pop(
                             partition_key
diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index bbed04a81..9e15df5b2 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -3159,6 +3159,7 @@ def test_given_unfinished_first_parent_partition_no_parent_state_update():
     }
     assert mock_cursor_1.stream_slices.call_count == 1  # Called once for each partition
     assert mock_cursor_2.stream_slices.call_count == 1  # Called once for each partition
+    assert len(cursor._semaphore_per_partition) == 2
 
 
 def test_given_unfinished_last_parent_partition_with_partial_parent_state_update():
@@ -3243,6 +3244,7 @@ def test_given_unfinished_last_parent_partition_with_partial_parent_state_update
     }
     assert mock_cursor_1.stream_slices.call_count == 1  # Called once for each partition
     assert mock_cursor_2.stream_slices.call_count == 1  # Called once for each partition
+    assert len(cursor._semaphore_per_partition) == 1
 
 
 def test_given_all_partitions_finished_when_close_partition_then_final_state_emitted():
@@ -3317,6 +3319,7 @@ def test_given_all_partitions_finished_when_close_partition_then_final_state_emi
     assert final_state["lookback_window"] == 1
     assert cursor._message_repository.emit_message.call_count == 2
     assert mock_cursor.stream_slices.call_count == 2  # Called once for each partition
+    assert len(cursor._semaphore_per_partition) == 1
 
 
 def test_given_partition_limit_exceeded_when_close_partition_then_switch_to_global_cursor():
@@ -3377,3 +3380,75 @@ def test_given_partition_limit_exceeded_when_close_partition_then_switch_to_glob
     assert "lookback_window" in final_state
     assert len(cursor._cursor_per_partition) <= cursor.DEFAULT_MAX_PARTITIONS_NUMBER
     assert mock_cursor.stream_slices.call_count == 3  # Called once for each partition
+
+
+def test_semaphore_cleanup():
+    # Create two mock cursors with different states for each partition
+    mock_cursor_1 = MagicMock()
+    mock_cursor_1.stream_slices.return_value = iter(
+        [
+            {"slice1": "data1"},
+            {"slice2": "data1"},  # First partition slices
+        ]
+    )
+    mock_cursor_1.state = {"updated_at": "2024-01-02T00:00:00Z"}  # State for partition "1"
+
+    mock_cursor_2 = MagicMock()
+    mock_cursor_2.stream_slices.return_value = iter(
+        [
+            {"slice2": "data2"},
+            {"slice2": "data2"},  # Second partition slices
+        ]
+    )
+    mock_cursor_2.state = {"updated_at": "2024-01-03T00:00:00Z"}  # State for partition "2"
+
+    # Configure cursor factory to return different mock cursors based on partition
+    cursor_factory_mock = MagicMock()
+    cursor_factory_mock.create.side_effect = [mock_cursor_1, mock_cursor_2]
+
+    cursor = ConcurrentPerPartitionCursor(
+        cursor_factory=cursor_factory_mock,
+        partition_router=MagicMock(),
+        stream_name="test_stream",
+        stream_namespace=None,
+        stream_state={},
+        message_repository=MagicMock(),
+        connector_state_manager=MagicMock(),
+        connector_state_converter=MagicMock(),
+        cursor_field=CursorField(cursor_field_key="updated_at"),
+    )
+
+    # Simulate partitions with unique parent states
+    slices = [
+        StreamSlice(partition={"id": "1"}, cursor_slice={}),
+        StreamSlice(partition={"id": "2"}, cursor_slice={}),
+    ]
+    cursor._partition_router.stream_slices.return_value = iter(slices)
+    # Simulate unique parent states for each partition
+    cursor._partition_router.get_stream_state.side_effect = [
+        {"parent": {"state": "state1"}},  # Parent state for partition "1"
+        {"parent": {"state": "state2"}},  # Parent state for partition "2"
+    ]
+
+    # Generate slices to populate semaphores and parent states
+    generated_slices = list(
+        cursor.stream_slices()
+    )  # Populate _semaphore_per_partition and _partition_parent_state_map
+
+    # Verify initial state
+    assert len(cursor._semaphore_per_partition) == 2
+    assert len(cursor._partition_parent_state_map) == 2
+    assert cursor._partition_parent_state_map['{"id":"1"}'] == {"parent": {"state": "state1"}}
+    assert cursor._partition_parent_state_map['{"id":"2"}'] == {"parent": {"state": "state2"}}
+
+    # Close partitions to acquire semaphores (value back to 0)
+    for s in generated_slices:
+        cursor.close_partition(DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), s))
+
+    # Check state after closing partitions
+    assert len(cursor._finished_partitions) == 2
+    assert len(cursor._semaphore_per_partition) == 0
+    assert '{"id":"1"}' not in cursor._semaphore_per_partition
+    assert '{"id":"2"}' not in cursor._semaphore_per_partition
+    assert len(cursor._partition_parent_state_map) == 0  # All parent states should be popped
+    assert cursor._parent_state == {"parent": {"state": "state2"}}  # Last parent state

From a7ece97d56ac5911ebd48b2fb582dd8de3e253c0 Mon Sep 17 00:00:00 2001
From: Anatolii Yatsuk <tolikyatsuk@gmail.com>
Date: Fri, 21 Feb 2025 17:53:44 +0200
Subject: [PATCH 26/26] Delete testing prints

---
 .../declarative/incremental/concurrent_partition_cursor.py      | 1 -
 .../incremental/test_concurrent_perpartitioncursor.py           | 2 --
 2 files changed, 3 deletions(-)

diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
index 3532b4e67..715589026 100644
--- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
+++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py
@@ -251,7 +251,6 @@ def _emit_state_message(self, throttle: bool = True) -> None:
         self._message_repository.emit_message(state_message)
 
     def stream_slices(self) -> Iterable[StreamSlice]:
-        print("stream_slices")
         if self._timer.is_running():
             raise RuntimeError("stream_slices has been executed more than once.")
 
diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
index 9e15df5b2..3b4b4fe24 100644
--- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
+++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py
@@ -3144,7 +3144,6 @@ def test_given_unfinished_first_parent_partition_no_parent_state_update():
             DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
         )
     cursor.ensure_at_least_one_state_emitted()
-    print(cursor.state)
 
     state = cursor.state
     assert state == {
@@ -3229,7 +3228,6 @@ def test_given_unfinished_last_parent_partition_with_partial_parent_state_update
             DeclarativePartition("test_stream", {}, MagicMock(), MagicMock(), slice)
         )
     cursor.ensure_at_least_one_state_emitted()
-    print(cursor.state)
 
     state = cursor.state
     assert state == {