Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quic: Ack overflow probes, 10x idle_timeout buffer #4188

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/app/fdctl/config/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ dynamic_port_range = "8900-9000"
idle_timeout_millis = 10000

# Max delay for outgoing ACKs.
ack_delay_millis = 50
ack_delay_millis = 25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we changing the max ack delay in the default.toml?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using this branch mostly for testing rn, but just worth noting 25 is what the QUIC spec recommends. Directionally, this may contribute to our under-delivery issue (although unlikely)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we have an under-delivery issue :-)
But PRs that add tests or change testing behavior should not change production config, as a general rule.


# QUIC retry is a feature to combat new connection request
# spamming. See rfc9000 8.1.2 for more details. This flag
Expand Down
21 changes: 18 additions & 3 deletions src/waltz/quic/fd_quic.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ fd_quic_init( fd_quic_t * quic ) {
}
state->svc_delay[ FD_QUIC_SVC_INSTANT ] = 0UL;
state->svc_delay[ FD_QUIC_SVC_ACK_TX ] = quic->config.ack_delay;
state->svc_delay[ FD_QUIC_SVC_WAIT ] = quic->config.idle_timeout;
state->svc_delay[ FD_QUIC_SVC_WAIT ] = quic->config.idle_timeout * 10;

/* Check TX AIO */

Expand Down Expand Up @@ -1368,6 +1368,7 @@ fd_quic_send_retry( fd_quic_t * quic,
uint dst_ip_addr,
ushort dst_udp_port ) {

FD_DTRACE_PROBE_3( fd_quic_send_retry, fd_ulong_load_8(odcid->conn_id), new_conn_id, dst_ip_addr );
fd_quic_state_t * state = fd_quic_get_state( quic );

ulong expire_at = state->now + quic->config.retry_ttl;
Expand Down Expand Up @@ -1755,6 +1756,7 @@ fd_quic_handle_v1_initial( fd_quic_t * quic,
}

/* update last activity */
FD_DTRACE_PROBE_2( fd_quic_initial_touch_last_activity, conn->our_conn_id, state->now );
conn->last_activity = state->now;
conn->flags &= ~( FD_QUIC_CONN_FLAGS_PING_SENT | FD_QUIC_CONN_FLAGS_PING );

Expand Down Expand Up @@ -1906,6 +1908,7 @@ fd_quic_handle_v1_handshake(
}

/* update last activity */
FD_DTRACE_PROBE_2( fd_quic_hs_touch_last_activity, conn->our_conn_id, fd_quic_get_state( quic )->now );
conn->last_activity = fd_quic_get_state( quic )->now;
conn->flags &= ~( FD_QUIC_CONN_FLAGS_PING_SENT | FD_QUIC_CONN_FLAGS_PING );

Expand Down Expand Up @@ -2161,6 +2164,7 @@ fd_quic_handle_v1_one_rtt( fd_quic_t * quic,
}

/* update last activity */
FD_DTRACE_PROBE_2( fd_quic_one_rtt_touch_last_activity, conn->our_conn_id, fd_quic_get_state( quic )->now );
conn->last_activity = fd_quic_get_state( quic )->now;

/* update expected packet number */
Expand All @@ -2169,7 +2173,6 @@ fd_quic_handle_v1_one_rtt( fd_quic_t * quic,
return tot_sz;
}


/* process v1 quic packets
returns number of bytes consumed, or FD_QUIC_PARSE_FAIL upon error */
ulong
Expand Down Expand Up @@ -2215,10 +2218,14 @@ fd_quic_process_quic_packet_v1( fd_quic_t * quic,
if( dcid.sz == FD_QUIC_CONN_ID_SZ ) {
conn = fd_quic_conn_query( state->conn_map, fd_ulong_load_8( dcid.conn_id ) );
}

fd_quic_conn_id_t scid = fd_quic_conn_id_new( long_hdr->src_conn_id, long_hdr->src_conn_id_len );

uchar long_packet_type = fd_quic_h0_long_packet_type( *cur_ptr );

FD_DTRACE_PROBE_3( fd_quic_process_quic_packet_v1_long_hdr, long_packet_type, fd_ulong_load_8(dcid.conn_id),
conn ? conn->our_conn_id : 0 );

/* encryption level matches that of TLS */
pkt->enc_level = long_packet_type; /* V2 uses an indirect mapping */

Expand Down Expand Up @@ -2259,6 +2266,7 @@ fd_quic_process_quic_packet_v1( fd_quic_t * quic,

/* find connection id */
ulong dst_conn_id = fd_ulong_load_8( cur_ptr+1 );
FD_DTRACE_PROBE_1( fd_quic_process_quic_packet_v1_short_hdr, dst_conn_id );
conn = fd_quic_conn_query( state->conn_map, dst_conn_id );
if( FD_UNLIKELY( !conn ) ) {
FD_DEBUG( FD_LOG_DEBUG(( "one_rtt failed: no connection found" )) );
Expand Down Expand Up @@ -2706,7 +2714,8 @@ fd_quic_tls_cb_peer_params( void * context,

/* set the max_idle_timeout to the min of our and peer max_idle_timeout */
if( peer_tp->max_idle_timeout ) {
conn->idle_timeout = fd_ulong_min( (ulong)(1e6) * peer_tp->max_idle_timeout, conn->idle_timeout );
/* 10 for margin of safety */
conn->idle_timeout = 10*fd_ulong_min( (ulong)(1e6) * peer_tp->max_idle_timeout, conn->idle_timeout );
}

/* set ack_delay_exponent so we can properly interpret peer's ack_delays
Expand Down Expand Up @@ -2863,9 +2872,13 @@ fd_quic_svc_poll( fd_quic_t * quic,
"... the connection is silently closed and its state is discarded
when it remains idle for longer than the minimum of the
max_idle_timeout value advertised by both endpoints." */
if( FD_UNLIKELY( !fd_quic_conn_query( state->conn_map, conn->our_conn_id ) ) ) {
FD_DTRACE_PROBE_1( fd_quic_timeout_missing_conn, conn->our_conn_id );
}
FD_DEBUG( FD_LOG_WARNING(( "%s conn %p conn_idx: %u closing due to idle timeout (%g ms)",
conn->server?"SERVER":"CLIENT",
(void *)conn, conn->conn_idx, (double)conn->idle_timeout / 1e6 )); )
FD_DTRACE_PROBE_1( fd_quic_conn_idle_timeout, conn->our_conn_id );

conn->state = FD_QUIC_CONN_STATE_DEAD;
quic->metrics.conn_timeout_cnt++;
Expand Down Expand Up @@ -3855,6 +3868,8 @@ fd_quic_conn_tx( fd_quic_t * quic,
pkt_meta->pkt_number++;
}

FD_DTRACE_PROBE_3( fd_quic_conn_tx, conn_id, enc_level, pkt_number );

if( enc_level == fd_quic_enc_level_appdata_id ) {
/* short header must be last in datagram
so send in packet immediately */
Expand Down
5 changes: 5 additions & 0 deletions src/waltz/quic/fd_quic_ack_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ fd_quic_ack_pkt( fd_quic_ack_gen_t * gen,

/* Attempt to allocate another ACK queue entry */
if( gen->head - gen->tail >= FD_QUIC_ACK_QUEUE_CNT ) {
FD_DTRACE_PROBE_3( fd_quic_ack_tx_overflow, enc_level, pkt_number, now );
FD_DEBUG( FD_LOG_DEBUG(( "ACK queue overflow! (excessive reordering)" )); )
return FD_QUIC_ACK_TX_ENOSPC;
}

/* Start new pending ACK */
FD_DTRACE_PROBE_3( fd_quic_ack_range_new, enc_level, pkt_number, now );
FD_ACK_DEBUG( FD_LOG_DEBUG(( "gen=%p queue ACK for enc=%u pkt_num=%lu seq=%u",
(void *)gen, enc_level, pkt_number, gen->head )); )
fd_quic_ack_t * next_ack = fd_quic_ack_queue_ele( gen, gen->head );
Expand Down Expand Up @@ -105,6 +107,7 @@ fd_quic_gen_ack_frames( fd_quic_ack_gen_t * gen,
for( ; gen->tail != gen->head; gen->tail++ ) {
fd_quic_ack_t * ack = fd_quic_ack_queue_ele( gen, gen->tail );
if( ack->enc_level != enc_level ) {
FD_DTRACE_PROBE_2( fd_quic_ack_tx_enc_level_break, ack->enc_level, enc_level );
FD_ACK_DEBUG( FD_LOG_DEBUG(( "need encryption level %u for ACKs but have %u", ack->enc_level, enc_level )); )
break;
}
Expand All @@ -125,6 +128,7 @@ fd_quic_gen_ack_frames( fd_quic_ack_gen_t * gen,
FD_DEBUG( FD_LOG_DEBUG(( "insufficient buffer space to send ACK" )); )
break;
}
FD_DTRACE_PROBE_2( fd_quic_ack_frame_created, ack_frame.largest_ack, ack_frame.first_ack_range );
payload_ptr += frame_sz;
FD_ACK_DEBUG( FD_LOG_DEBUG(( "gen=%p sending ACK enc=%u range=[%lu,%lu) seq=%u sz=%lu",
(void *)gen, enc_level, ack->pkt_number.offset_lo, ack->pkt_number.offset_hi, gen->tail, frame_sz )); )
Expand All @@ -134,6 +138,7 @@ fd_quic_gen_ack_frames( fd_quic_ack_gen_t * gen,
if( gen->head == gen->tail ) {
gen->is_elicited = 0;
} else {
FD_DTRACE_PROBE_1( fd_quic_ack_not_all_frames_flushed, gen->head - gen->tail );
FD_ACK_DEBUG( FD_LOG_DEBUG(( "Not all ACK frames were flushed" )); )
}

Expand Down
2 changes: 1 addition & 1 deletion src/waltz/quic/tests/fuzz_quic_wire.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ LLVMFuzzerTestOneInput( uchar const * data,

/* Simulate conn timeout */
while( state->svc_queue[ FD_QUIC_SVC_WAIT ].head != UINT_MAX ) {
ulong idle_timeout_ts = conn->last_activity + quic->config.idle_timeout + 1UL;
ulong idle_timeout_ts = conn->last_activity + 10*quic->config.idle_timeout + 1UL;
fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, state->svc_queue[ FD_QUIC_SVC_WAIT ].head );

/* Idle timeouts should not be scheduled significantly late */
Expand Down
Loading