From f5067b7cc5e58cf38199c49f2d07581fcd19f800 Mon Sep 17 00:00:00 2001 From: Shadaj Laddad Date: Wed, 22 Jan 2025 17:09:51 -0700 Subject: [PATCH] docs(hydro_lang): initial website docs on core Hydro concepts --- Cargo.lock | 1 + docs/docs/hydro/correctness.md | 16 +++++++ .../hydro/live-collections/_category_.json | 8 ++++ .../live-collections/bounded-unbounded.md | 48 +++++++++++++++++++ .../determinism.md} | 17 +++---- docs/docs/hydro/live-collections/index.md | 4 ++ .../live-collections/singletons-optionals.md | 10 ++++ docs/docs/hydro/live-collections/streams.md | 10 ++++ docs/docs/hydro/locations/_category_.json | 8 ++++ docs/docs/hydro/locations/clusters.md | 10 ++++ docs/docs/hydro/locations/external-clients.md | 10 ++++ docs/docs/hydro/locations/index.md | 6 +++ docs/docs/hydro/locations/processes.md | 10 ++++ docs/docs/hydro/stageleft.mdx | 2 +- .../hydro/ticks-timestamps/_category_.json | 8 ++++ docs/docs/hydro/ticks-timestamps/atomicity.md | 10 ++++ .../ticks-timestamps/batching-and-emitting.md | 10 ++++ .../hydro/ticks-timestamps/execution-model.md | 10 ++++ docs/docs/hydro/ticks-timestamps/index.md | 6 +++ .../hydro/ticks-timestamps/stateful-loops.md | 10 ++++ hydro_lang/src/stream.rs | 10 ++++ hydro_test/Cargo.toml | 1 + 22 files changed, 213 insertions(+), 12 deletions(-) create mode 100644 docs/docs/hydro/correctness.md create mode 100644 docs/docs/hydro/live-collections/_category_.json create mode 100644 docs/docs/hydro/live-collections/bounded-unbounded.md rename docs/docs/hydro/{consistency.md => live-collections/determinism.md} (68%) create mode 100644 docs/docs/hydro/live-collections/index.md create mode 100644 docs/docs/hydro/live-collections/singletons-optionals.md create mode 100644 docs/docs/hydro/live-collections/streams.md create mode 100644 docs/docs/hydro/locations/_category_.json create mode 100644 docs/docs/hydro/locations/clusters.md create mode 100644 docs/docs/hydro/locations/external-clients.md create mode 100644 docs/docs/hydro/locations/index.md create mode 100644 docs/docs/hydro/locations/processes.md create mode 100644 docs/docs/hydro/ticks-timestamps/_category_.json create mode 100644 docs/docs/hydro/ticks-timestamps/atomicity.md create mode 100644 docs/docs/hydro/ticks-timestamps/batching-and-emitting.md create mode 100644 docs/docs/hydro/ticks-timestamps/execution-model.md create mode 100644 docs/docs/hydro/ticks-timestamps/index.md create mode 100644 docs/docs/hydro/ticks-timestamps/stateful-loops.md diff --git a/Cargo.lock b/Cargo.lock index bd721e82b1e..88943c1c77d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1648,6 +1648,7 @@ dependencies = [ "stageleft", "stageleft_tool", "tokio", + "tokio-test", ] [[package]] diff --git a/docs/docs/hydro/correctness.md b/docs/docs/hydro/correctness.md new file mode 100644 index 00000000000..f43ff37dab3 --- /dev/null +++ b/docs/docs/hydro/correctness.md @@ -0,0 +1,16 @@ +--- +sidebar_position: 3 +--- + +# Safety and Correctness +Just like Rust's type system helps you avoid memory safety bugs, Hydro helps you ensure **distributed safety**. Hydro's type systems helps you avoid many kinds of distributed systems bugs, including: +- Non-determinism due to message delays (which reorder arrival) or retries (which result in duplicates) + - See [Live Collections / Eventual Determinism](./live-collections/determinism.md) +- Using mismatched serialization and deserialization formats across services + - See [Locations and Networking](./locations/index.md) +- Misusing node identifiers across logically independent clusters of machines + - See [Locations / Clusters](./locations/clusters.md) +- Relying on non-determinstic clocks for batching events + - See [Ticks and Timestamps / Batching and Emitting Streams](./ticks-timestamps/batching-and-emitting.md) + +These safety guarantees are surfaced through the Rust type system, so you can catch these bugs at compile time rather than in production. And when it is necessary to bypass these checks for advanced distributed logic, you can use the same `unsafe` keyword as in Rust as an escape hatch. diff --git a/docs/docs/hydro/live-collections/_category_.json b/docs/docs/hydro/live-collections/_category_.json new file mode 100644 index 00000000000..e392c1ef73e --- /dev/null +++ b/docs/docs/hydro/live-collections/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Live Collections", + "position": 4, + "link": { + "type": "doc", + "id": "hydro/live-collections/index" + } +} diff --git a/docs/docs/hydro/live-collections/bounded-unbounded.md b/docs/docs/hydro/live-collections/bounded-unbounded.md new file mode 100644 index 00000000000..a60180fc3d2 --- /dev/null +++ b/docs/docs/hydro/live-collections/bounded-unbounded.md @@ -0,0 +1,48 @@ +--- +sidebar_position: 0 +--- + +# Bounded and Unbounded Types +Although live collections can be continually updated, some collection types also support **termination**, after which no additional changes can be made. For example, a live collection created by reading integers from an in-memory `Vec` will become terminated once all the elements of the `Vec` have been loaded. But other live collections, such as one being updated by the network, may never become terminated. + +In Hydro, certain APIs are restricted to only work on collections that are **guaranteed to terminate** (**bounded** collections). All live collections in Hydro have a type parameter (typically named `B`), which tracks whether the collection is bounded (has the type `Bounded`) or unbounded (has the type `Unbounded`). These types are used in the signature of many Hydro APIs to ensure that the API is only called on the appropriate type of collection. + +## Converting Boundedness +In some cases, you may need to convert between bounded and unbounded collections. Converting from a bounded collection **to an unbounded collection** is always allowed and safe, since it relaxes the guarantees on the collection. This can be done by calling `.into()` on the collection. + +```rust,no_run +# use hydro_lang::*; +# use dfir_rs::futures::StreamExt; +# let flow = FlowBuilder::new(); +# let process = flow.process::<()>(); +# let tick = process.tick(); +# let numbers = process.source_iter(q!(vec![1, 2, 3, 4])); +let input: Stream<_, _, Bounded> = // ... +# unsafe { numbers.timestamped(&tick).tick_batch() }; +let unbounded: Stream<_, _, Unbounded> = input.into(); +``` + +```rust,no_run +# use hydro_lang::*; +# use dfir_rs::futures::StreamExt; +# let flow = FlowBuilder::new(); +# let process = flow.process::<()>(); +# let tick = process.tick(); +let input: Singleton<_, _, Bounded> = tick.singleton(q!(0)); +let unbounded: Singleton<_, _, Unbounded> = input.into(); +``` + +Converting from an unbounded collection **to a bounded collection**, however is more complex. This requires cutting off the unbounded collection at a specific point in time, which may not be possible to do deterministically. For example, the most common way to convert an unbounded `Stream` to a bounded one is to batch its elements non-deterministically using `.tick_batch()`. + +```rust,no_run +# use hydro_lang::*; +# use dfir_rs::futures::StreamExt; +# let flow = FlowBuilder::new(); +# let process = flow.process::<()>(); +let unbounded_input = // ... +# process.source_iter(q!(vec![1, 2, 3, 4])); +let tick = process.tick(); +let batch: Stream<_, _, Bounded> = unsafe { + unbounded_input.timestamped(&tick).tick_batch() +}; +``` diff --git a/docs/docs/hydro/consistency.md b/docs/docs/hydro/live-collections/determinism.md similarity index 68% rename from docs/docs/hydro/consistency.md rename to docs/docs/hydro/live-collections/determinism.md index e22702cad46..f632f777214 100644 --- a/docs/docs/hydro/consistency.md +++ b/docs/docs/hydro/live-collections/determinism.md @@ -1,9 +1,9 @@ --- -sidebar_position: 3 +sidebar_position: 1 --- -# Consistency and Safety -A key feature of Hydro is its integration with the Rust type system to highlight possible sources of inconsistent distributed behavior due to sources of non-determinism such as batching, timeouts, and message reordering. In this section, we'll walk through the consistency guarantees in Hydro and how to use the **`unsafe`** keyword as an escape hatch when introducing sources of non-determinism. +# Eventual Determinism +Most programs are strong guarantees on **determinism**, the property that when provided the same inputs, the outputs of the program are always the same. Even when the inputs and outputs are live collections, we can focus on the _eventual_ state of the collection (as if we froze the input and waited until the output stops changing). :::info @@ -11,12 +11,7 @@ Our consistency and safety model is based on the POPL'25 paper [Flo: A Semantic ::: -## Eventual Determinism -Hydro provides strong guarantees on **determinism**, the property that when provided the same inputs, the outputs of the program are always the same. Even when the inputs and outputs are streaming, we can use this property by looking at the **aggregate collection** (i.e. the result of collecting the elements of the stream into a finite collection). This makes it easy to build composable blocks of code without having to worry about runtime behavior such as batching or network delays. - -Because Hydro programs can involve network delay, we guarantee **eventual determinism**: given a set of streaming inputs which have arrived, the outputs of the program (which continuously change as inputs arrive) will **eventually** have the same _aggregate_ value. - -Again, by focusing on the _aggregate_ value rather than individual outputs, Hydro programs can involve concepts such as retractions (for incremental computation) while still guaranteeing determinism because the _resolved_ output (after processing retractions) will eventually be the same. +Hydro thus guarantees **eventual determinism**: given a set of streaming inputs which have arrived, the outputs of the program will **eventually** have the same _final_ value. This makes it easy to build composable blocks of code without having to worry about runtime behavior such as batching or network delays. :::note @@ -54,9 +49,9 @@ use std::fmt::Debug; use std::time::Duration; /// ... -/// +/// /// # Safety -/// This function will non-deterministically print elements +/// This function will non-deterministically print elements /// from the stream according to a timer. unsafe fn print_samples( stream: Stream, Unbounded> diff --git a/docs/docs/hydro/live-collections/index.md b/docs/docs/hydro/live-collections/index.md new file mode 100644 index 00000000000..cc57a73fcbe --- /dev/null +++ b/docs/docs/hydro/live-collections/index.md @@ -0,0 +1,4 @@ +# Live Collections +Traditional programs (like those in Rust) typically manipulate **collections** of data elements, such as those stored in a `Vec` or `HashMap`. These collections are **fixed** in the sense that any transformations applied to them such as `map` are immediately executed on a snapshot of the collection. This means that the output will not be updated when the input collection is modified. + +In Hydro, programs instead work with **live collections** which are expected to dynamically change over time as new elements are added or removed (in response to API requests, streaming ingestion, etc). Applying a transformation like `map` to a live collection results in another live collection that will dynamically change over time. diff --git a/docs/docs/hydro/live-collections/singletons-optionals.md b/docs/docs/hydro/live-collections/singletons-optionals.md new file mode 100644 index 00000000000..1679e6f536f --- /dev/null +++ b/docs/docs/hydro/live-collections/singletons-optionals.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 3 +--- + +# Singletons and Optionals +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/live-collections/streams.md b/docs/docs/hydro/live-collections/streams.md new file mode 100644 index 00000000000..599999e0d88 --- /dev/null +++ b/docs/docs/hydro/live-collections/streams.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 2 +--- + +# Streams +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/locations/_category_.json b/docs/docs/hydro/locations/_category_.json new file mode 100644 index 00000000000..fc22d7a7c83 --- /dev/null +++ b/docs/docs/hydro/locations/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Locations and Networking", + "position": 5, + "link": { + "type": "doc", + "id": "hydro/locations/index" + } +} diff --git a/docs/docs/hydro/locations/clusters.md b/docs/docs/hydro/locations/clusters.md new file mode 100644 index 00000000000..c97fe8bb071 --- /dev/null +++ b/docs/docs/hydro/locations/clusters.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 1 +--- + +# Clusters +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/locations/external-clients.md b/docs/docs/hydro/locations/external-clients.md new file mode 100644 index 00000000000..d53ea41f0b9 --- /dev/null +++ b/docs/docs/hydro/locations/external-clients.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 2 +--- + +# External Clients +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/locations/index.md b/docs/docs/hydro/locations/index.md new file mode 100644 index 00000000000..25e609d2e76 --- /dev/null +++ b/docs/docs/hydro/locations/index.md @@ -0,0 +1,6 @@ +# Locations and Networking +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/locations/processes.md b/docs/docs/hydro/locations/processes.md new file mode 100644 index 00000000000..44359005905 --- /dev/null +++ b/docs/docs/hydro/locations/processes.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 0 +--- + +# Processes +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/stageleft.mdx b/docs/docs/hydro/stageleft.mdx index 54c119394d5..a4d50c394b2 100644 --- a/docs/docs/hydro/stageleft.mdx +++ b/docs/docs/hydro/stageleft.mdx @@ -1,6 +1,6 @@ --- title: Stageleft -sidebar_position: 4 +sidebar_position: 7 --- import StageleftDocs from '../../../stageleft/README.md' diff --git a/docs/docs/hydro/ticks-timestamps/_category_.json b/docs/docs/hydro/ticks-timestamps/_category_.json new file mode 100644 index 00000000000..12eb82d40ec --- /dev/null +++ b/docs/docs/hydro/ticks-timestamps/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Ticks and Timestamps", + "position": 6, + "link": { + "type": "doc", + "id": "hydro/ticks-timestamps/index" + } +} diff --git a/docs/docs/hydro/ticks-timestamps/atomicity.md b/docs/docs/hydro/ticks-timestamps/atomicity.md new file mode 100644 index 00000000000..de3be3c3a97 --- /dev/null +++ b/docs/docs/hydro/ticks-timestamps/atomicity.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 3 +--- + +# Atomicity with Timestamps +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/ticks-timestamps/batching-and-emitting.md b/docs/docs/hydro/ticks-timestamps/batching-and-emitting.md new file mode 100644 index 00000000000..a9549c51161 --- /dev/null +++ b/docs/docs/hydro/ticks-timestamps/batching-and-emitting.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 1 +--- + +# Batching and Emitting Streams +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/ticks-timestamps/execution-model.md b/docs/docs/hydro/ticks-timestamps/execution-model.md new file mode 100644 index 00000000000..db2dc3d05c9 --- /dev/null +++ b/docs/docs/hydro/ticks-timestamps/execution-model.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 0 +--- + +# The Tick Execution Model +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/ticks-timestamps/index.md b/docs/docs/hydro/ticks-timestamps/index.md new file mode 100644 index 00000000000..e0d5f47a476 --- /dev/null +++ b/docs/docs/hydro/ticks-timestamps/index.md @@ -0,0 +1,6 @@ +# Ticks and Timestamps +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/docs/docs/hydro/ticks-timestamps/stateful-loops.md b/docs/docs/hydro/ticks-timestamps/stateful-loops.md new file mode 100644 index 00000000000..7f3c56673e6 --- /dev/null +++ b/docs/docs/hydro/ticks-timestamps/stateful-loops.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 2 +--- + +# Stateful Loops +:::caution + +The Hydro documentation is currently under active development! This page is a placeholder for future content. + +::: diff --git a/hydro_lang/src/stream.rs b/hydro_lang/src/stream.rs index deffeb4fb35..db14cfe40c5 100644 --- a/hydro_lang/src/stream.rs +++ b/hydro_lang/src/stream.rs @@ -73,6 +73,16 @@ pub struct Stream { _phantom: PhantomData<(T, L, B, Order)>, } +impl<'a, T, L: Location<'a>, O> From> for Stream { + fn from(stream: Stream) -> Stream { + Stream { + location: stream.location, + ir_node: stream.ir_node, + _phantom: PhantomData, + } + } +} + impl<'a, T, L: Location<'a>, B> From> for Stream { fn from(stream: Stream) -> Stream { Stream { diff --git a/hydro_test/Cargo.toml b/hydro_test/Cargo.toml index 8e43bdec962..75e82d67d86 100644 --- a/hydro_test/Cargo.toml +++ b/hydro_test/Cargo.toml @@ -31,3 +31,4 @@ hydro_lang = { path = "../hydro_lang", version = "^0.11.0", features = [ "deploy futures = "0.3.0" async-ssh2-lite = { version = "0.5.0", features = ["vendored-openssl"] } dfir_macro = { path = "../dfir_macro", version = "^0.11.0" } +tokio-test = "0.4.4"