diff --git a/datastores/gossip_kv/kv/membership.rs b/datastores/gossip_kv/kv/membership.rs index 3657c87d3cd..1a06c50fc2e 100644 --- a/datastores/gossip_kv/kv/membership.rs +++ b/datastores/gossip_kv/kv/membership.rs @@ -7,8 +7,8 @@ pub type MemberId = String; /// Information about a member in the cluster. /// -/// A member is a transducer that is part of the cluster. Leaving or failing is a terminal -/// state for a member. When a transducer restarts and rejoins the cluster, it is considered a +/// A member is a process that is part of the cluster. Leaving or failing is a terminal +/// state for a member. When a process restarts and rejoins the cluster, it is considered a /// new member. /// /// # Generic Parameters diff --git a/datastores/gossip_kv/kv/server.rs b/datastores/gossip_kv/kv/server.rs index f3d2afc199e..d7727c463c5 100644 --- a/datastores/gossip_kv/kv/server.rs +++ b/datastores/gossip_kv/kv/server.rs @@ -105,7 +105,7 @@ where dfir_syntax! { on_start = initialize() -> tee(); - on_start -> for_each(|_| info!("{:?}: Transducer {} started.", context.current_tick(), member_id_6)); + on_start -> for_each(|_| info!("{:?}: Process {} started.", context.current_tick(), member_id_6)); seed_nodes = source_stream(seed_node_stream) -> fold::<'static>(|| Box::new(seed_nodes), |last_seed_nodes, new_seed_nodes: Vec>| { diff --git a/datastores/gossip_kv/load_test_server/server.rs b/datastores/gossip_kv/load_test_server/server.rs index df9df93699a..0b36491e898 100644 --- a/datastores/gossip_kv/load_test_server/server.rs +++ b/datastores/gossip_kv/load_test_server/server.rs @@ -27,7 +27,7 @@ const UNKNOWN_ADDRESS: LoadTestAddress = 9999999999; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Parser)] struct Opts { - /// Number of threads to run. Each thread will run an instance of the gossip-kv server transducer. + /// Number of threads to run. Each thread will run an instance of the gossip-kv server process. #[clap(short, long, default_value = "5")] thread_count: usize, diff --git a/dfir_rs/examples/kvs_bench/server.rs b/dfir_rs/examples/kvs_bench/server.rs index 7a270f8bac3..b6fcfa02f6d 100644 --- a/dfir_rs/examples/kvs_bench/server.rs +++ b/dfir_rs/examples/kvs_bench/server.rs @@ -46,12 +46,12 @@ pub fn run_server( let buffer_pool = BufferPool::::create_buffer_pool(); - let (transducer_to_peers_tx, mut transducer_to_peers_rx) = + let (process_to_peers_tx, mut process_to_peers_rx) = dfir_rs::util::unsync_channel::<(Bytes, NodeId)>(None); - let (client_to_transducer_tx, client_to_transducer_rx) = + let (client_to_process_tx, client_to_process_rx) = dfir_rs::util::unsync_channel::<(KvsRequest, NodeId)>(None); - let (transducer_to_client_tx, mut _transducer_to_client_rx) = + let (process_to_client_tx, mut _process_to_client_rx) = dfir_rs::util::unsync_channel::<(KvsResponse, NodeId)>(None); let localset = task::LocalSet::new(); @@ -71,7 +71,7 @@ pub fn run_server( collector: Rc::clone(&buffer_pool), }.deserialize(&mut deserializer).unwrap(); - client_to_transducer_tx.try_send((req, node_id)).unwrap(); + client_to_process_tx.try_send((req, node_id)).unwrap(); } } }) @@ -86,7 +86,7 @@ pub fn run_server( // TODO: Eventually this would get moved into a dfir operator that would return a Bytes struct and be efficient and zero copy and etc. async move { loop { - while let Some((serialized_req, node_id)) = transducer_to_peers_rx.next().await { + while let Some((serialized_req, node_id)) = process_to_peers_rx.next().await { let index = lookup.binary_search(&node_id).unwrap(); topology.tx[index].send(serialized_req).unwrap(); } @@ -170,7 +170,7 @@ pub fn run_server( union_puts_and_gossip_requests = union(); simulated_put_requests -> union_puts_and_gossip_requests; - source_stream(client_to_transducer_rx) + source_stream(client_to_process_rx) // -> inspect(|x| println!("{server_id}:{:5}: from peers: {x:?}", context.current_tick())) -> union_puts_and_gossip_requests; @@ -244,7 +244,7 @@ pub fn run_server( peers // -> inspect(|x| println!("{server_id}:{:5}: sending to peers: {x:?}", context.current_tick())) - -> for_each(|(node_id, serialized_req)| transducer_to_peers_tx.try_send((serialized_req, node_id)).unwrap()); + -> for_each(|(node_id, serialized_req)| process_to_peers_tx.try_send((serialized_req, node_id)).unwrap()); // join for lookups lookup = _lattice_join_fused_join::<'static, 'tick, MyLastWriteWins, MySetUnion>(); @@ -275,7 +275,7 @@ pub fn run_server( }) -> flatten() // -> inspect(|x| println!("{gossip_addr}:{:5}: Response to client: {x:?}", context.current_tick())) - -> for_each(|x| transducer_to_client_tx.try_send(x).unwrap()); + -> for_each(|x| process_to_client_tx.try_send(x).unwrap()); }; diff --git a/dfir_rs/examples/shopping/README.md b/dfir_rs/examples/shopping/README.md index cc61b1d277e..48e95264f52 100644 --- a/dfir_rs/examples/shopping/README.md +++ b/dfir_rs/examples/shopping/README.md @@ -23,10 +23,10 @@ cargo run -p hydroflow --example shopping -- --opt 5 Adding the `--graph ` flag to the end of the command lines above will print out a node-and-edge diagram of the program. Supported values for `` include [mermaid](https://mermaid-js.github.io/) and [dot](https://graphviz.org/doc/info/lang.html). -For options 1-4, the driver runs a single Hydro transducer (thread) that handles client requests. +For options 1-4, the driver runs a single Hydro process (thread) that handles client requests. -For options 5-6, the driver runs two Hydro transducers, one for each side of the network communication. +For options 5-6, the driver runs two Hydro processes, one for each side of the network communication. -For option 7, the driver runs four Hydro transducers: one client proxy and 3 server replicas. +For option 7, the driver runs four Hydro processes: one client proxy and 3 server replicas. -Under all options, the driver runs an additional independent Hydro transducer (thread) to receive the output of the flow and print it to the console. The code for this transducer is in `flows/listener_flow.rs`. +Under all options, the driver runs an additional independent Hydro process (thread) to receive the output of the flow and print it to the console. The code for this process is in `flows/listener_flow.rs`. diff --git a/dfir_rs/examples/shopping/driver.rs b/dfir_rs/examples/shopping/driver.rs index 64a9dd53326..2f3f3033b30 100644 --- a/dfir_rs/examples/shopping/driver.rs +++ b/dfir_rs/examples/shopping/driver.rs @@ -83,8 +83,8 @@ pub(crate) async fn run_driver(opts: Opts) { let client_out_addr = ipv4_resolve("localhost:23460").unwrap(); let (client_out, _, _) = dfir_rs::util::bind_udp_bytes(client_out_addr).await; - // shopping input is handled by the client proxy transducer - // so the server transducer should get an empty iterator as its first argument + // shopping input is handled by the client proxy process + // so the server process should get an empty iterator as its first argument let empty_ssiv = std::iter::empty(); // Spawn server @@ -180,8 +180,8 @@ pub(crate) async fn run_driver(opts: Opts) { let (out, _, _) = dfir_rs::util::bind_udp_bytes(out_addr).await; let gossip_addrs = gossip_addrs.clone(); - // shopping input is handled by the client proxy transducer - // so the server transducers should get an empty iterator as first argument + // shopping input is handled by the client proxy process + // so the server processes should get an empty iterator as first argument let empty_ssiv = std::iter::empty(); // Spawn server diff --git a/dfir_rs/examples/shopping/flows/client_state_flow.rs b/dfir_rs/examples/shopping/flows/client_state_flow.rs index 7c8facd7659..0dd594728c1 100644 --- a/dfir_rs/examples/shopping/flows/client_state_flow.rs +++ b/dfir_rs/examples/shopping/flows/client_state_flow.rs @@ -29,9 +29,9 @@ pub(crate) async fn client_state_flow( // Set up the Udp socket for proxy-server communication let (carts_out, carts_in, _) = dfir_rs::util::bind_udp_bytes(local_addr).await; - // This is like server_state_flow, but we split it into two transducers at a different spot. - // Here, the first transducer takes in shopping_ssiv requests and runs a stateful fold_keyed, persisting all the shopping requests in ssiv's. - // The second transducer listens on reqs_in and runs the lookup join. + // This is like server_state_flow, but we split it into two processes at a different spot. + // Here, the first process takes in shopping_ssiv requests and runs a stateful fold_keyed, persisting all the shopping requests in ssiv's. + // The second process listens on reqs_in and runs the lookup join. dfir_syntax! { source_iter(shopping_ssiv) -> fold_keyed::<'static>(SSIV_BOT, ssiv_merge) diff --git a/dfir_rs/examples/shopping/flows/orig_flow.rs b/dfir_rs/examples/shopping/flows/orig_flow.rs index db0570dea9b..4f1fc343d93 100644 --- a/dfir_rs/examples/shopping/flows/orig_flow.rs +++ b/dfir_rs/examples/shopping/flows/orig_flow.rs @@ -17,7 +17,7 @@ pub(crate) async fn orig_flow( ) -> Dfir<'static> { let client_class = client_class_iter(); - // This is the straightforward single-transducer sequential case. + // This is the straightforward single-process sequential case. // For each Request in "shopping" we look up its "client_class" (basic or prime) // via a join operator, then we group by (client, class), and for each such pair // we grow a separate vector of ClLineItems. No seal is needed in the sequential case. diff --git a/dfir_rs/examples/shopping/flows/server_state_flow.rs b/dfir_rs/examples/shopping/flows/server_state_flow.rs index 0f69fe84c4d..5941c45ed3a 100644 --- a/dfir_rs/examples/shopping/flows/server_state_flow.rs +++ b/dfir_rs/examples/shopping/flows/server_state_flow.rs @@ -29,9 +29,9 @@ pub(crate) async fn server_state_flow( // Set up the Udp socket for proxy-server communication let (reqs_out, reqs_in, _) = dfir_rs::util::bind_udp_bytes(local_addr).await; - // This is like push_group_flow, but we split it into two transducers that communicate via reqs_out and reqs_in. - // The first transducer takes in shopping_ssiv requests, and forwards them via reqs_out to the second transducer. - // The second transducer listens on reqs_in and runs the stateful logic of fold_keyed and join. + // This is like push_group_flow, but we split it into two processes that communicate via reqs_out and reqs_in. + // The first process takes in shopping_ssiv requests, and forwards them via reqs_out to the second process. + // The second process listens on reqs_in and runs the stateful logic of fold_keyed and join. dfir_syntax! { // Networked: Server-Side State source_iter(shopping_ssiv) diff --git a/dfir_rs/src/scheduled/ticks.rs b/dfir_rs/src/scheduled/ticks.rs index 205f1db40bf..9229f60d2cb 100644 --- a/dfir_rs/src/scheduled/ticks.rs +++ b/dfir_rs/src/scheduled/ticks.rs @@ -1,16 +1,16 @@ //! This module contains types to work with ticks. //! -//! Each iteration of a Hydroflow transducer loop is called a tick. Associated with the transducer -//! is a clock value, which tells you how many ticks were executed by this transducer prior to the -//! current tick. Each transducer produces totally ordered, sequentially increasing clock values, -//! which you can think of as the "local logical time" at the transducer. +//! Each iteration of a Hydroflow process loop is called a tick. Associated with the process +//! is a clock value, which tells you how many ticks were executed by this process prior to the +//! current tick. Each process produces totally ordered, sequentially increasing clock values, +//! which you can think of as the "local logical time" at the process. use std::fmt::{Display, Formatter}; use std::ops::{Add, AddAssign, Neg, Sub, SubAssign}; use serde::{Deserialize, Serialize}; -/// A point in time during execution on transducer. +/// A point in time during execution on process. /// /// `TickInstant` instances can be subtracted to calculate the `TickDuration` between them. /// @@ -66,7 +66,7 @@ pub struct TickDuration { impl TickInstant { /// Create a new TickInstant /// - /// The specified parameter indicates the number of ticks that have elapsed on the transducer, + /// The specified parameter indicates the number of ticks that have elapsed on the process, /// prior to this one. pub fn new(ticks: u64) -> Self { TickInstant(ticks) diff --git a/dfir_rs/src/util/simulation.rs b/dfir_rs/src/util/simulation.rs index fa173ad0f02..c71c11cda2b 100644 --- a/dfir_rs/src/util/simulation.rs +++ b/dfir_rs/src/util/simulation.rs @@ -1,11 +1,11 @@ //! # Hydroflow Deterministic Simulation Testing Framework //! //! This module provides a deterministic simulation testing framework for testing Hydroflow -//! transducers. +//! processes. //! -//! It can be used to test complex interactions between multiple Hydroflow transducers in a +//! It can be used to test complex interactions between multiple Hydroflow processes in a //! deterministic manner by running them in a single-threaded environment. The framework also -//! provides a "virtual network" implementation that allows production transducers to exchange +//! provides a "virtual network" implementation that allows production processes to exchange //! messages within the simulation. More importantly, the network is fully under control of the //! unit test and the test can introduce faults such as message delays, message drops and //! network partitions. @@ -13,7 +13,7 @@ //! ## Overview //! //! Conceptually, the simulation contains a "Fleet", which is a collection of "Hosts". These -//! aren't real hosts, but rather a collection of individual Hydroflow transducers (one per host) +//! aren't real hosts, but rather a collection of individual Hydroflow processes (one per host) //! that can communicate with each other over a virtual network. Every host has a "hostname" //! which uniquely identifies it within the fleet. //! @@ -47,7 +47,7 @@ //! ## Network Processing //! //! ### Outboxes & Inboxes -//! When a transducer wishes to send a message to another transducer, it sends the message to an +//! When a process wishes to send a message to another process, it sends the message to an //! "outbox" on its host. The unit test invokes the simulation's network message processing logic //! at some desired cadence to pick up all messages from all outboxes and deliver them to the //! corresponding inboxes on the destination hosts. The network message processing logic is the @@ -55,13 +55,13 @@ //! //! ### Interface Names //! Every inbox and outbox is associated with an "interface name". This is a string that uniquely -//! identifies the interface on the host. When a transducer sends a message, it specifies the +//! identifies the interface on the host. When a process sends a message, it specifies the //! destination hostname and the interface name on that host to which the message should be //! delivered. //! //! ## Progress of Time in the Simulation //! The single-threaded unit test can drive time forward on every host by invoking the `run_tick` -//! method on the host. This ultimately runs a single tick on the transducer. The unit test is +//! method on the host. This ultimately runs a single tick on the process. The unit test is //! also responsible for invoking the network message processing at the time of its choosing and //! can interleave the progress of time on various hosts and network processing as it sees fit. //! @@ -85,7 +85,7 @@ use crate::scheduled::graph::Dfir; use crate::util::{collect_ready_async, unbounded_channel}; /// A hostname is a unique identifier for a host in the simulation. It is used to address messages -/// to a specific host (and thus a specific Hydroflow transducer). +/// to a specific host (and thus a specific Hydroflow process). pub type Hostname = String; /// An interface name is a unique identifier for an inbox or an outbox on host. @@ -127,45 +127,45 @@ impl MessageSender for UnboundedSender<(T, Address)> { /// A message with an delivery address. pub type MessageWithAddress = (Box, Address); -/// An inbox is used by a host to receive messages for the transducer. +/// An inbox is used by a host to receive messages for the process. pub struct Inbox { sender: Box, } -/// Transducers can send messages to other transducers by putting those messages in an outbox +/// Processes can send messages to other processes by putting those messages in an outbox /// on their host. pub struct Outbox { receiver: Pin>>, } -/// A host is a single Hydroflow transducer running in the simulation. It has a unique hostname +/// A host is a single Hydroflow process running in the simulation. It has a unique hostname /// and can communicate with other hosts over the virtual network. It has a collection of inboxes /// and outboxes. pub struct Host { name: Hostname, - transducer: Dfir<'static>, + process: Dfir<'static>, inputs: HashMap, output: HashMap, } impl Host { - /// Run a single tick on the host's transducer. Returns true if any work was done by the - /// transducer. This effectively "advances" time on the transducer. + /// Run a single tick on the host's process. Returns true if any work was done by the + /// process. This effectively "advances" time on the process. pub fn run_tick(&mut self) -> bool { - self.transducer.run_tick() + self.process.run_tick() } } /// A builder for constructing a host in the simulation. pub struct HostBuilder { name: Hostname, - transducer: Option>, + process: Option>, inboxes: HashMap, outboxes: HashMap, } /// Used in conjunction with the `HostBuilder` to construct a host in the simulation. -pub struct TransducerBuilderContext<'context> { +pub struct ProcessBuilderContext<'context> { inboxes: &'context mut HashMap, outboxes: &'context mut HashMap, } @@ -177,9 +177,9 @@ fn sink_from_fn(mut f: impl FnMut(T)) -> impl Sink { }) } -impl TransducerBuilderContext<'_> { +impl ProcessBuilderContext<'_> { /// Create a new inbox on the host with the given interface name. Returns a stream that can - /// be read by the transducer using the source_stream dfir operator. + /// be read by the process using the source_stream dfir operator. pub fn new_inbox( &mut self, interface: InterfaceName, @@ -195,7 +195,7 @@ impl TransducerBuilderContext<'_> { } /// Creates a new outbox on the host with the given interface name. Returns a sink that can - /// be written to by the transducer using the dest_sink dfir operator. + /// be written to by the process using the dest_sink dfir operator. pub fn new_outbox( &mut self, interface: InterfaceName, @@ -220,35 +220,35 @@ impl HostBuilder { pub fn new(name: Hostname) -> Self { HostBuilder { name, - transducer: None, + process: None, inboxes: Default::default(), outboxes: Default::default(), } } - /// Supplies the (mandatory) transducer that runs on this host. - pub fn with_transducer(mut self, builder: F) -> Self + /// Supplies the (mandatory) process that runs on this host. + pub fn with_process(mut self, builder: F) -> Self where - F: FnOnce(&mut TransducerBuilderContext) -> Dfir<'static>, + F: FnOnce(&mut ProcessBuilderContext) -> Dfir<'static>, { - let mut context = TransducerBuilderContext { + let mut context = ProcessBuilderContext { inboxes: &mut self.inboxes, outboxes: &mut self.outboxes, }; - let transducer = builder(&mut context); - self.transducer = Some(transducer); + let process = builder(&mut context); + self.process = Some(process); self } /// Builds the host with the supplied configuration. pub fn build(self) -> Host { - if self.transducer.is_none() { - panic!("Transducer is required to build a host"); + if self.process.is_none() { + panic!("Process is required to build a host"); } Host { name: self.name, - transducer: self.transducer.unwrap(), + process: self.process.unwrap(), inputs: self.inboxes, output: self.outboxes, } @@ -269,13 +269,13 @@ impl Fleet { } } - /// Adds a new host to the fleet with the given name and transducer. - pub fn add_host(&mut self, name: String, transducer_builder: F) -> &Host + /// Adds a new host to the fleet with the given name and process. + pub fn add_host(&mut self, name: String, process_builder: F) -> &Host where - F: FnOnce(&mut TransducerBuilderContext) -> Dfir<'static>, + F: FnOnce(&mut ProcessBuilderContext) -> Dfir<'static>, { let host = HostBuilder::new(name.clone()) - .with_transducer(transducer_builder) + .with_process(process_builder) .build(); assert!( self.hosts.insert(host.name.clone(), host).is_none(), diff --git a/docs/docs/deploy/your-first-deploy.md b/docs/docs/deploy/your-first-deploy.md index 86f10de7808..3216ffc891d 100644 --- a/docs/docs/deploy/your-first-deploy.md +++ b/docs/docs/deploy/your-first-deploy.md @@ -5,7 +5,7 @@ sidebar_position: 3 # Your First Deploy Now that we have Hydro Deploy installed, let's deploy our first app. We'll start with a simple app that echoes packets. -First, we need to write the DFIR application, which will intergrate with Hydro Deploy to initialize connections to other services. We can create a project using the Cargo template: +First, we need to write the DFIR application, which will integrate with Hydro Deploy to initialize connections to other services. We can create a project using the Cargo template: ```bash # if you don't already have cargo-generate installed diff --git a/docs/docs/dfir/concepts/distributed_time.md b/docs/docs/dfir/concepts/distributed_time.md index e7d10efacfd..bd6420137ec 100644 --- a/docs/docs/dfir/concepts/distributed_time.md +++ b/docs/docs/dfir/concepts/distributed_time.md @@ -19,7 +19,7 @@ The way that Lamport clocks jump ahead provides a desirable property: the timest Lamport timestamps track not only the order of events on a single node, they also ensure that the timestamps on events reflect distributed ordering. Suppose that node `source` wants to send a message to node `dest`, and node source has current clock value *T_source*. The events that precede that message on node `source` have smaller timestamps. In addition, consider an event at node `dest` that follows the receipt of that message. That event must have a timestamp greater than *T_source* by Lamport's advancing rule above. Hence all the events on node `source` that preceded the sending of the message have lower timestamps than the events on node `dest` following the receipt of the message. This is Lamport's distributed "happens-before" relation, and the Lamport clock capture that relation. ## DFIR Time -As a built-in primitive, DFIR defines time only for a single transducer, as a sequence of consecutive ticks without any gaps. +As a built-in primitive, DFIR defines time only for a single process, as a sequence of consecutive ticks without any gaps. Thus the main difference between DFIR events and Lamport events are: diff --git a/docs/docs/dfir/concepts/index.md b/docs/docs/dfir/concepts/index.md index 0baa66c9aaa..2bbc4c7d179 100644 --- a/docs/docs/dfir/concepts/index.md +++ b/docs/docs/dfir/concepts/index.md @@ -25,31 +25,31 @@ DFIR is a library that can be used in any Rust program. It includes two main com DFIR's surface syntax must be embedded in a Rust program; the Rust compiler takes that DFIR syntax and compiles it into an efficient binary executable. -> We call a running DFIR binary a *transducer*. +> We call a running DFIR binary a *process*. -In typical usage, a developer writes a transducer as a single-threaded Rust program that is mostly composed of -DFIR surface syntax. Each transducer is typically responsible for a single -"node" (a machine, or a core) in a distributed system composed of many such transducers, +In typical usage, a developer writes a process as a single-threaded Rust program that is mostly composed of +DFIR surface syntax. Each process is typically responsible for a single +"node" (a machine, or a core) in a distributed system composed of many such processes, which send and receive flows of data to each other. -> DFIR itself does not generate distributed code. It is a library for specifying the transducers (individual nodes) that +> DFIR itself does not generate distributed code. It is a library for specifying the processes (individual nodes) that > participate in a distributed system. > > In the [Hydro Project](https://hydro.run), higher-level languages are being built on top of DFIR to generate -> distributed code in the form of multiple transducers. -> Meanwhile, you can use DFIR to write your own distributed code, by writing individual transducers that work together, +> distributed code in the form of multiple processes. +> Meanwhile, you can use DFIR to write your own distributed code, by writing individual processes that work together, > and deploying them manually or with a tool like [Hydroplane](https://github.com/hydro-project/hydroplane). See the [Hydro Ecosystem](../ecosystem) for more on this. ### So how might a human write distributed systems with DFIR? As an illustration of how you can work at the DFIR layer, consider the [Chat Server example](https://github.com/hydro-project/hydro/tree/main/dfir_rs/examples/chat). If you run that binary -with the command-line argument `--role server` it will start a single transducer that is responsible for a chat server: receiving +with the command-line argument `--role server` it will start a single process that is responsible for a chat server: receiving membership requests and messages from clients, and forwarding messages from individual clients to all other clients. -If you run that binary with the argument `--role client` it will start a transducer that is responsible for a chat client, which +If you run that binary with the argument `--role client` it will start a process that is responsible for a chat client, which forwards chat messages from stdin to the server, and prints out messages sent by the server. As a distributed system, the chat -service would typically consist of many client transducers and a single server transducer. +service would typically consist of many client processes and a single server process. -Note that this is an example of an extremely simple distributed system in a "star" or "hub-and spokes" topology: the multiple client transducers are completely independent of each other, and each talks only with the central server transducer. +Note that this is an example of an extremely simple distributed system in a "star" or "hub-and spokes" topology: the multiple client processes are completely independent of each other, and each talks only with the central server process.
@@ -65,7 +65,7 @@ graph TD; ```
- If we wanted something more interesting, we could consider deploying a cluster of multiple server transducers, say for fault tolerance or geo-distribution, perhaps like this: + If we wanted something more interesting, we could consider deploying a cluster of multiple server processes, say for fault tolerance or geo-distribution, perhaps like this:
diff --git a/docs/docs/dfir/concepts/life_and_times.md b/docs/docs/dfir/concepts/life_and_times.md index 41adf78b2a4..d35ba6b7fba 100644 --- a/docs/docs/dfir/concepts/life_and_times.md +++ b/docs/docs/dfir/concepts/life_and_times.md @@ -2,22 +2,22 @@ sidebar_position: 1 --- -# The Life and Times of a DFIR Transducer +# The Life and Times of a DFIR Process Time is a fundamental concept in many distributed systems. DFIR's model of time is very simple, but also powerful enough to serve as a building block for more complex models of time. -Like most reactive services, we can envision a DFIR transducer running as an unbounded loop that is managed -by the runtime library. Each iteration of the transducer's loop is called a *tick*. Associated with the transducer is +Like most reactive services, we can envision a DFIR process running as an unbounded loop that is managed +by the runtime library. Each iteration of the process's loop is called a *tick*. Associated with the process is a *clock* value (accessible via the `.current_tick()` method), which tells you how many ticks were executed -by this transducer prior to the current tick. Each transducer produces totally ordered, sequentially increasing clock values, -which you can think of as the "local logical time" at the transducer. +by this process prior to the current tick. Each process produces totally ordered, sequentially increasing clock values, +which you can think of as the "local logical time" at the process. -The transducer's main loop works as follows: +The process's main loop works as follows: 1. Given events and messages buffered from the operating system, ingest a batch of data items and deliver them to the appropriate `source_xxx` operators in the DFIR spec. 2. Run the DFIR spec. If the spec has cycles, continue executing it until it reaches a "fixpoint" on the current batch; i.e. it no longer produces any new data anywhere in the flow. Along the way, any data that appears in an outbound channel is streamed to the appropriate destination. 3. Once the spec reaches fixpoint and messages have all been sent, advance the local clock and then start the next tick. -The transducer's main loop is shown in the following diagram: +The process's main loop is shown in the following diagram: ```mermaid %%{init: {'theme':'neutral'}}%% @@ -25,7 +25,7 @@ flowchart LR subgraph external[External] network>"messages\n& events"\nfa:fa-telegram]-->buffer[[buffer]] end - subgraph transducer[Transducer Loop] + subgraph process[Process Loop] buffer-->ingest>ingest a batch\nof data]-->loop(((run DFIR\nspec to fixpoint\nfa:fa-cog)))-->stream[stream out\nmsgs & events\nfa:fa-telegram]-->clock((advance\nclock\nfa:fa-clock-o))-- new tick! -->ingest end style stream fill:#0fa,stroke:#aaa,stroke-width:2px,stroke-dasharray: 5 5 @@ -35,4 +35,4 @@ flowchart LR linkStyle 5 stroke:red,stroke-width:4px,color:red; ``` -In sum, an individual transducer advances sequentially through logical time; in each tick of its clock it ingests a batch of data from its inbound channels, executes the DFIR spec, and sends any outbound data to its outbound channels. \ No newline at end of file +In sum, an individual process advances sequentially through logical time; in each tick of its clock it ingests a batch of data from its inbound channels, executes the DFIR spec, and sends any outbound data to its outbound channels. \ No newline at end of file diff --git a/docs/docs/dfir/concepts/stratification.md b/docs/docs/dfir/concepts/stratification.md index 50900d709cc..063d766f67b 100644 --- a/docs/docs/dfir/concepts/stratification.md +++ b/docs/docs/dfir/concepts/stratification.md @@ -3,12 +3,12 @@ sidebar_position: 3 --- # Streaming, Blocking and Stratification -Many DFIR operators (e.g. `map`, `filter` and `join`) work in a streaming fashion. Streaming operators process data as it arrives, generating outputs in the midst of processing inputs. If you restrict yourself to operators that work in this streaming fashion, then your transducer may start sending data across the network mid-tick, even while it is still consuming the data in the input batch. +Many DFIR operators (e.g. `map`, `filter` and `join`) work in a streaming fashion. Streaming operators process data as it arrives, generating outputs in the midst of processing inputs. If you restrict yourself to operators that work in this streaming fashion, then your process may start sending data across the network mid-tick, even while it is still consuming the data in the input batch. But some operators are blocking, and must wait for all their input data to arrive before they can produce any output data. For example, a `sort` operator must wait for all its input data to arrive before it can produce a single output value. After all, the lowest value may be the last to arrive! ## Examples of Blocking Operators -The discussion above should raise questions in your mind. What do we mean by "all the input data" in a long-running service? We don't want to wait until the end of time—this is one reason we break time up into discrete "ticks" at each transducer. So when we say that a blocking operator waits for "all the input data", we mean "all the input data in the current tick". +The discussion above should raise questions in your mind. What do we mean by "all the input data" in a long-running service? We don't want to wait until the end of time—this is one reason we break time up into discrete "ticks" at each process. So when we say that a blocking operator waits for "all the input data", we mean "all the input data in the current tick". Consider the simple statement below, which receives data from a network source each tick, sorts that tick's worth of data, and prints it to stdout: ```rust,ignore @@ -59,14 +59,14 @@ end The `difference` operators is one with inputs of two different types. It is supposed to output all the items from its `pos` input that do not appear in its `neg` input. To achieve that, the `neg` input must be blocking, but the `pos` input can stream. Blocking on the `neg` input ensures that if the operator streams an output from the `pos` input, it will never need to retract that output. -Given these examples, we can refine our diagram of the DFIR transducer loop to account for stratified execution within each tick: +Given these examples, we can refine our diagram of the DFIR process loop to account for stratified execution within each tick: ```mermaid %%{init: {'theme':'neutral'}}%% flowchart LR subgraph external[External] network>"messages\n& events"\nfa:fa-telegram]-->buffer[[buffer]] end - subgraph transducer[Transducer Loop] + subgraph process[Process Loop] buffer-->ingest>"ingest a batch\nof data"]-->loop((("for each stratum in\n[0..max(strata)]\nrun stratum to fixpoint\nfa:fa-cog")))-->stream[stream out\nmsgs & events\nfa:fa-telegram]-->clock((advance\nclock\nfa:fa-clock-o))-- new tick! -->ingest end style stream fill:#0fa,stroke:#aaa,stroke-width:2px,stroke-dasharray: 5 5 @@ -88,7 +88,7 @@ The DFIR compiler performs stratification via static analysis of the DFIR spec. Given the acyclicity test, any legal DFIR program consists of a directed acyclic graph (DAG) of strata and handoffs. The strata are numbered in ascending order by assigning stratum number 0 to the "leaves" of the DAG (strata with no upstream operators), and then ensuring that each stratum is assigned a number that is one larger than any of its upstream strata. -As a DFIR operator executes, it is running on a particular transducer, in a particular tick, in a particular stratum. +As a DFIR operator executes, it is running on a particular process, in a particular tick, in a particular stratum. ### Determining whether an operator should block: Monotonicity @@ -102,6 +102,6 @@ By contrast, consider the output of a blocking operator like `difference`. The o DFIR is designed to use the monotonicity property to determine whether an operator should block. If an operator is monotone with respect to an input, that input is streaming. If an operator is non-monotone, it is blocking. -Monotonicity turns out to be particularly important for distributed systems. In particular, if all your transducers are fully monotone across ticks, then they can run in parallel without any coordination—they will always stream correct prefixes of the final outputs, and eventually will deliver the complete output. This is the positive direction of the [CALM Theorem](https://cacm.acm.org/magazines/2020/9/246941-keeping-calm/fulltext). +Monotonicity turns out to be particularly important for distributed systems. In particular, if all your processes are fully monotone across ticks, then they can run in parallel without any coordination—they will always stream correct prefixes of the final outputs, and eventually will deliver the complete output. This is the positive direction of the [CALM Theorem](https://cacm.acm.org/magazines/2020/9/246941-keeping-calm/fulltext). > In future versions of DFIR, the type system will represent monotonicity explicitly and reason about it automatically. diff --git a/docs/docs/dfir/ecosystem.md b/docs/docs/dfir/ecosystem.md index e5b45fd115d..0f3a4ab8d2e 100644 --- a/docs/docs/dfir/ecosystem.md +++ b/docs/docs/dfir/ecosystem.md @@ -10,15 +10,15 @@ A rough picture of the Hydro stack is below: Working down from the top: -- [*Hydro*](../hydro) is an end-user-facing high-level [choreographic](https://en.wikipedia.org/wiki/Choreographic_programming) [dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) language. Hydro is a *global* language for programming a fleet of transducers. Programmers author dataflow pipelines that start with streams of events and data, and span boundaries across multiple `process` and (scalable) `cluster` specifications. +- [*Hydro*](../hydro) is an end-user-facing high-level [choreographic](https://en.wikipedia.org/wiki/Choreographic_programming) [dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) language. Hydro is a *global* language for programming a fleet of processes. Programmers author dataflow pipelines that start with streams of events and data, and span boundaries across multiple `process` and (scalable) `cluster` specifications. - *Hydrolysis* is a compiler that translates a global Hydro spec to multiple single-threaded DFIR programs, which collectively implement the global spec. This compilation phase is currently a part of the Hydro codebase, but will evolve into a standalone optimizing compiler inspired by database query optimizers and [e-graphs](https://en.wikipedia.org/wiki/E-graph). - [DFIR and its compiler/runtime](https://github.com/hydro-project/hydro/tree/main/dfir_rs) are the subject of this book. -Where Hydro is a *global* language for programming a fleet of processes, DFIR is a *local* language for programming a single process that participates in a distributed system. More specifically, DFIR is an internal representation (IR) language and runtime library that generates the low-level Rust code for an individual transducer. As a low-level IR, DFIR is not intended for the general-purpose programmer. For most users it is intended as a readable compiler target from Hydro; advanced developers can also use it to manually program individual transducers. +Where Hydro is a *global* language for programming a fleet of processes, DFIR is a *local* language for programming a single process that participates in a distributed system. More specifically, DFIR is an internal representation (IR) language and runtime library that generates the low-level Rust code for an individual process. As a low-level IR, DFIR is not intended for the general-purpose programmer. For most users it is intended as a readable compiler target from Hydro; advanced developers can also use it to manually program individual processes. -- [HydroDeploy](../deploy) is a service for launching DFIR transducers on a variety of platforms. +- [HydroDeploy](../deploy) is a service for launching DFIR processes on a variety of platforms. - Hydro also supports *Deterministic Simulation Testing* to aid in debugging distributed programs. Documentation on this feature is forthcoming. diff --git a/docs/docs/dfir/faq.md b/docs/docs/dfir/faq.md index 99cd641e6bb..365dab22047 100644 --- a/docs/docs/dfir/faq.md +++ b/docs/docs/dfir/faq.md @@ -7,8 +7,8 @@ sidebar_position: 9 **A:** DFIR is designed as a substrate for building a wide range of distributed systems; traditional software dataflow systems like the ones mentioned are targeted more narrowly at large-scale data processing. As such, DFIR differs from these systems in several ways: -First, DFIR is a lower-level abstraction than the systems mentioned. DFIR adopts a dataflow abstraction for specifying a transducer -running on a *single core*; one implementes a distributed system out of multiple DFIR transducers. By contrast, Spark, Flink and MapReduce are +First, DFIR is a lower-level abstraction than the systems mentioned. DFIR adopts a dataflow abstraction for specifying a process +running on a *single core*; one implementes a distributed system out of multiple DFIR processes. By contrast, Spark, Flink and MapReduce are distributed systems, which make specific choices for implementing issues like scheduling, fault tolerance, and so on. Second, the systems mentioned above were designed specifically for distributed data processing tasks. @@ -24,13 +24,13 @@ like Paxos or Two-Phase Commit with low latency. #### Q: What model of parallelism does DFIR use? SPMD? MPMD? Actors? MPI? **A:** As a substrate for building individual nodes in a distributed systems, DFIR does not make any assumptions about the model of parallelism used. One can construct a distributed system out of DFIR -transducers that use any model of parallelism, including +processes that use any model of parallelism, including [SPMD](https://en.wikipedia.org/wiki/Flynn%27s_taxonomy#Single_program,_multiple_data_streams_(SPMD)), [MPMD](https://en.wikipedia.org/wiki/Flynn%27s_taxonomy#Multiple_programs,_multiple_data_streams_(MPMD)), [Actors](https://en.wikipedia.org/wiki/Actor_model) or [MPI Collective Operations](https://en.wikipedia.org/wiki/Collective_operation). -DFIR provides abstractions for implementing individual transducers to handle inputs, computing, and emitting outputs in +DFIR provides abstractions for implementing individual processes to handle inputs, computing, and emitting outputs in a general-purpose manner. -That said, it is common practice to deploy many instance of the same DFIR transducer; most distributed systems built in DFIR +That said, it is common practice to deploy many instance of the same DFIR process; most distributed systems built in DFIR therefore have some form of SPMD parallelism. (This is true of most distributed systems in general.) \ No newline at end of file diff --git a/docs/docs/dfir/quickstart/setup.md b/docs/docs/dfir/quickstart/setup.md index 29eac7a4cbc..a5150d8821d 100644 --- a/docs/docs/dfir/quickstart/setup.md +++ b/docs/docs/dfir/quickstart/setup.md @@ -83,10 +83,10 @@ simply echoes back the messages you sent it; it also implements a client to test We will replace the code in that example with our own, but it's a good idea to run it first to make sure everything is working. :::note -We call a running DFIR binary a *transducer*. +We call a running DFIR binary a *process*. ::: -Start by running a transducer for the server: +Start by running a process for the server: ```console #shell-command-next-line cargo run -- --role server @@ -94,7 +94,7 @@ Listening on 127.0.0.1: Server live! ``` -Take note of the server's port number, and in a separate terminal, start a client transducer: +Take note of the server's port number, and in a separate terminal, start a client process: ```console #shell-command-next-line cd diff --git a/docs/docs/hydro/dataflow-programming.mdx b/docs/docs/hydro/dataflow-programming.mdx index cbb3c69f9db..4fd51a2665a 100644 --- a/docs/docs/hydro/dataflow-programming.mdx +++ b/docs/docs/hydro/dataflow-programming.mdx @@ -5,9 +5,9 @@ sidebar_position: 1 # Dataflow Programming Hydro uses a dataflow programming model, which will be familiar if you have used APIs like Rust iterators. Instead of using RPCs or async/await to describe distributed computation, Hydro instead uses **asynchronous streams**, which represent data arriving over time. Streams can represent a series of asynchronous events (e.g. inbound network requests) or a sequence of data items. -Programs in Hydro describe how to **transform** entire collections of data using operators such as `map` (transforming elements one by one), `fold` (aggregating elements into a single value), or `join` (combining elements from multiple streams on matching keys). +Programs in Hydro describe how to **transform** streams and other collections of data using operators such as `map` (transforming elements one by one), `fold` (aggregating elements into a single value), or `join` (combining elements from multiple streams on matching keys). -If you are familiar with Spark, Flink or Pandas, you will find Hydro syntax familiar. However, note well that the semantics for asynchronous streams in Hydro differ significantly from bulk analytics systems like those above. In particular, Hydro uses the type system to distinguish between bounded streams (originating from finite data) and unbounded streams (originated from asynchronous input). Moreover, Hydro is designed to handle asynchronous streams of small, independent events very efficiently. +If you are familiar with Spark, Flink or Pandas, you will find Hydro syntax familiar. However, note well that the semantics for asynchronous streams in Hydro differ significantly from bulk analytics systems like those above. In particular, Hydro uses the type system to distinguish between bounded streams (originating from finite data collections) and unbounded streams (originated from asynchronous input). Moreover, Hydro is designed to handle asynchronous streams of small, independent events very efficiently. The core of the Hydro API are the following types and their methods: - [Stream](https://hydro.run/rustdoc/hydro_lang/stream/struct.Stream) diff --git a/docs/docs/hydro/index.mdx b/docs/docs/hydro/index.mdx index 777e98fe233..5cb770efceb 100644 --- a/docs/docs/hydro/index.mdx +++ b/docs/docs/hydro/index.mdx @@ -5,7 +5,7 @@ sidebar_position: 0 # Introduction Hydro is a high-level distributed programming framework for Rust powered by the [DFIR runtime](../dfir/index.mdx). Unlike traditional architectures such as actors or RPCs, Hydro offers _choreographic_ APIs, where expressions and functions can describe computation that takes place across many locations. It also integrates with [Hydro Deploy](../deploy/index.md) to make it easy to deploy and run Hydro programs to the cloud. -Hydro uses a two-stage compilation approach. Hydro programs are standard Rust programs, which first run on the developer's laptop to generate a _deployment plan_. This plan is then compiled to individual binaries for each machine in the distributed system (enabling zero-overhead abstractions), and are then deployed to the cloud using the generated plan along with specifications of cloud resources. +Hydro uses a two-stage compilation approach. Hydro programs are standard Rust programs, which first run on the developer's laptop to generate a _deployment plan_. This plan is then compiled to individual binaries for each machine in the distributed system (enabling zero-overhead abstractions), which are then deployed to the cloud using the generated plan along with specifications of cloud resources. Hydro has been used to write a variety of high-performance distributed systems, including implementations of classic distributed protocols such as two-phase commit and Paxos. Work is ongoing to develop a distributed systems standard library that will offer these protocols and more as reusable components. diff --git a/docs/docs/hydro/live-collections/determinism.md b/docs/docs/hydro/live-collections/determinism.md index f632f777214..32fdee95d58 100644 --- a/docs/docs/hydro/live-collections/determinism.md +++ b/docs/docs/hydro/live-collections/determinism.md @@ -20,7 +20,7 @@ Much existing literature in distributed systems focuses on consistency levels su ::: ## Unsafe Operations in Hydro -All **safe** APIs in Hydro (the ones you can call regularly in Rust), guarantee determinism. But oftentimes it is necessary to do something non-deterministic, like generate events at a fixed time interval or split an input into arbitrarily sized batches. +All **safe** APIs in Hydro (the ones you can call regularly in Rust) guarantee determinism. But often it is necessary to do something non-deterministic, like generate events at a fixed wall-clock-time interval, or split an input into arbitrarily sized batches. Hydro offers APIs for such concepts behind an **`unsafe`** guard. This keyword is typically used to mark Rust functions that may not be memory-safe, but we reuse this in Hydro to mark non-deterministic APIs. @@ -41,7 +41,7 @@ unsafe { When writing a function with Hydro that involves `unsafe` code, it is important to be extra careful about whether the non-determinism is exposed externally. In some applications, a utility function may involve local non-determinism (such as sending retries), but not expose it outside the function (via deduplication). -But other utilities may expose the non-determinism, in which case they should be marked `unsafe` as well. If the function is public, Rust will require you to put a `# Safety` section in its documentation explain the non-determinism. +But other utilities may expose the non-determinism, in which case they should be marked `unsafe` as well. If the function is public, Rust will require you to put a `# Safety` section in its documentation to explain the non-determinism. ```rust # use hydro_lang::*; @@ -65,12 +65,12 @@ unsafe fn print_samples( ``` ## User-Defined Functions -Another source of potential non-determinism is user-defined functions, such as those provided to `map` or `filter`. Hydro allows for arbitrary Rust functions to be called inside these closures, so it is possible to introduce non-determinism which will not be checked by the compiler. +Another source of potential non-determinism is user-defined functions, such as those provided to `map` or `filter`. Hydro allows for arbitrary Rust functions to be called inside these closures, so it is possible to introduce non-determinism that will not be checked by the compiler. In general, avoid using APIs like random number generators inside transformation functions unless that non-determinism is explicitly documented somewhere. :::info -To help avoid such bugs, we are working on ways to use formal verification tools (such as [Kani](https://model-checking.github.io/kani/)) to check arbitrary Rust code for properties such as determinism and more. But this remains active research for now and is not yet available. +To help avoid such bugs, we are working on ways to use formal verification tools (such as [Kani](https://model-checking.github.io/kani/)) to check arbitrary Rust code for properties such as determinism and more. This remains active research for now and is not yet available. ::: diff --git a/docs/docs/hydro/quickstart/clusters.mdx b/docs/docs/hydro/quickstart/clusters.mdx index 52869db3332..17772809b74 100644 --- a/docs/docs/hydro/quickstart/clusters.mdx +++ b/docs/docs/hydro/quickstart/clusters.mdx @@ -12,13 +12,13 @@ So far, we have looked at distributed systems where each process is running a di ## Dataflow with Clusters Just like we use the `Process` type to represent a virtual handle to a single node, we can use the **`Cluster`** type to represent a handle to a **set of nodes** (with size unknown at compile-time). -A `Stream` located on a `Cluster` can be thought of as SIMD-style programming, where each cluster member executes the same operators but on different pieces of data. +A `Stream` located on a `Cluster` can be thought of as [SPMD](https://en.wikipedia.org/wiki/Single_program,_multiple_data)-style programming, where each cluster member executes the same operators but on different streams of data. To start, we set up a new module in `src/first_ten_cluster.rs` with a dataflow program that takes in a `Process` for a leader and `Cluster` for a set of workers. {getLines(firstTenClusterSrc, 1, 6)} -We start by materializing a stream of numbers on the `leader`, as before. But rather than sending the stream to a single process, we will instead _distribute_ the data to each member of the cluster using `round_robin_bincode`. This API places data on a `cluster` in a round-robin fashion by using the order of elements to determine which cluster member each element is sent to. +We start by materializing a stream of numbers on the `leader`, as before. But rather than sending the stream to a single process, we will instead _distribute_ the data to each member of the cluster using `round_robin_bincode`. This API partitions (shards) data across a `cluster` in a round-robin fashion by using the order of elements to determine which cluster member each element is sent to. :::info @@ -32,12 +32,12 @@ On each cluster member, we will then do some work to transform the data (using ` {getLines(firstTenClusterSrc, 10, 11)} -Finally, we will send the data back to the leader. We achieve this using a variant of the APIs from before: `send_bincode_interleaved`. If we used `send_bincode`, we would get a stream of (cluster ID, data) tuples. Since it is a common pattern to ignore the IDs, `send_bincode_interleaved` is available as a helper. +Finally, we will send the data back to the leader. We achieve this using a variant of the APIs from before: `send_bincode_interleaved`. If we used `send_bincode`, we would get a stream of `(cluster ID, data)` tuples. Since it is a common pattern to ignore the IDs, `send_bincode_interleaved` is available as a helper. {getLines(firstTenClusterSrc, 12, 14)} ## Deploying Clusters -Deployment scripts are similar to before, except that when provisioning a cluster we provide a list of deployment hosts rather than a single one. In our example, we'll launch 4 nodes for the cluster by creating a `Vec` of 4 localhost instances. +Deployment scripts are similar to before, except that when provisioning a cluster we provide a *list* of deployment hosts rather than a single one. In our example, we'll launch 4 nodes for the cluster by creating a `Vec` of 4 localhost instances. {highlightLines(firstTenClusterExample, [14])} @@ -70,4 +70,4 @@ cargo run --example first_ten_cluster [hydro_template::first_ten_cluster::Leader (process 0)] 14 ``` -You'll notice the round-robin distribution in action here, as each cluster log is tagged with the ID of the member (e.g. `/ 0`). In our deployment, we are sending data round-robin across 4 members of the cluster, numbered `0` through `3`. Hence cluster member `0` receives values `0`, `4`, `8` (corresponding to the highlighted lines), member `1` receives values `1`, `5`, `9`, and so on. +You'll notice the round-robin distribution in action here, as each cluster log is tagged with the ID of the member (e.g. `/ 0`). In our deployment, we are sending the even numbers round-robin across 4 members of the cluster, numbered `0` through `3`. Hence cluster member `0` receives values `0`, `8`, `16` (corresponding to the highlighted lines), member `1` receives values `2`, `10`, `18`, and so on. diff --git a/docs/docs/hydro/quickstart/first-dataflow.mdx b/docs/docs/hydro/quickstart/first-dataflow.mdx index 286c4879d9f..7cda095dd41 100644 --- a/docs/docs/hydro/quickstart/first-dataflow.mdx +++ b/docs/docs/hydro/quickstart/first-dataflow.mdx @@ -24,7 +24,7 @@ cargo generate gh:hydro-project/hydro-template ::: ## Writing a Dataflow -In Hydro, streams are attached to a **location**, which is either a virtual handle to a **single machine** (the **`Process`** type) or **set of machines** (the **`Cluster`** type). A single piece of Hydro code can describe a distributed program that runs across multiple processes and clusters, each with their own local state and data. +In Hydro, streams are attached to a **location**, which is a virtual handle to either a **single machine** (the **`Process`** type) or **set of machines** (the **`Cluster`** type). A single piece of Hydro code can describe a distributed program that runs across multiple processes and clusters, each with their own local state and data. We'll write our first dataflow in `src/first_ten.rs`. This program will run on a single machine, so we take a single `&Process` parameter. We can materialize a stream on this machine using `process.source_iter` (which emits values from a static in-memory collection), and then print out the values using `for_each`. @@ -46,13 +46,13 @@ To run a Hydro program, we need to write some deployment configuration in `examp :::tip -When using Hydro, we will *always* place our deployment scripts in the `examples` directory. This is required because deployment is done via [Hydro Deploy](../../deploy/index.md) which is a _dev dependency_---i.e. not part of the dependencies used for generating binaries (but available to programs in the `examples` directory). +When using Hydro, we will *always* place our deployment scripts in the `examples` directory. This is required because deployment is done via [Hydro Deploy](../../deploy/index.md) which is a _dev dependency_---i.e. not part of the dependencies used for generating binaries, but available to programs in the `examples` directory. ::: {firstTenExampleSrc} -First, we initialize a new [Hydro Deploy](../../deploy/index.md) deployment with `Deployment::new()`. Then, we create a `FlowBuilder` which will store the entire dataflow program and manage its compilation. +First, we initialize a new [Hydro Deploy](../../deploy/index.md) deployment with `Deployment::new()`. Then, we create a `FlowBuilder` that will store the entire dataflow program and manage its compilation. To create a `Process`, we call `flow.process()`. After the dataflow has been created (by invoking the `hydro_template::first_ten::first_ten` function we created earlier), we must map each instantiated `Process` to a deployment target using `flow.with_process` (in this case we deploy to localhost). diff --git a/docs/docs/hydro/stageleft.mdx b/docs/docs/hydro/stageleft.mdx index a4d50c394b2..1027df8f2c1 100644 --- a/docs/docs/hydro/stageleft.mdx +++ b/docs/docs/hydro/stageleft.mdx @@ -7,4 +7,11 @@ import StageleftDocs from '../../../stageleft/README.md' Under the hood, Hydro uses a library called Stageleft to power the `q!` macro and code generation logic. The following docs, from the Stageleft README, outline the core architecture of Stageleft. +:::note + +Hydro programmers need not understand how to use Stageleft; this information is provided here for readers +interested in how its `q!` macro works. + +::: + diff --git a/docs/package-lock.json b/docs/package-lock.json index 046e857f96d..3901bb33654 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -7262,9 +7262,10 @@ } }, "node_modules/express": { - "version": "4.21.1", - "resolved": "https://registry.npmjs.org/express/-/express-4.21.1.tgz", - "integrity": "sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ==", + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz", + "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==", + "license": "MIT", "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", @@ -7285,7 +7286,7 @@ "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", - "path-to-regexp": "0.1.10", + "path-to-regexp": "0.1.12", "proxy-addr": "~2.0.7", "qs": "6.13.0", "range-parser": "~1.2.1", @@ -7300,6 +7301,10 @@ }, "engines": { "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" } }, "node_modules/express/node_modules/content-disposition": { @@ -7327,9 +7332,10 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, "node_modules/express/node_modules/path-to-regexp": { - "version": "0.1.10", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", - "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==" + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", + "license": "MIT" }, "node_modules/express/node_modules/range-parser": { "version": "1.2.1", @@ -11987,15 +11993,16 @@ } }, "node_modules/nanoid": { - "version": "3.3.7", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", - "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", + "version": "3.3.8", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", + "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", "funding": [ { "type": "github", "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "bin": { "nanoid": "bin/nanoid.cjs" },