From fa87745f57abf648ec1107016a1e2f67d8e884f1 Mon Sep 17 00:00:00 2001 From: Tomer Cabouly Date: Tue, 10 Dec 2024 14:20:07 +0200 Subject: [PATCH] issue: 4183221 propagate socket errors In case we got an ECQE - set the socket in a closing state. Effectively this sends a TCP-RST. Signed-off-by: Tomer Cabouly --- src/core/dev/cq_mgr_tx.cpp | 9 ++++++ src/core/dev/hw_queue_tx.cpp | 47 ++++++++++++++++++-------------- src/core/dev/hw_queue_tx.h | 16 +++++++---- src/core/dev/ring.h | 5 ++-- src/core/dev/ring_bond.cpp | 4 +-- src/core/dev/ring_bond.h | 2 +- src/core/dev/ring_simple.cpp | 16 ++++++----- src/core/dev/ring_simple.h | 5 ++-- src/core/dev/ring_tap.cpp | 13 +++++---- src/core/dev/ring_tap.h | 8 +++--- src/core/proto/dst_entry.h | 26 ++++++++++++++++-- src/core/proto/dst_entry_tcp.cpp | 6 ++-- src/core/proto/dst_entry_tcp.h | 12 ++++---- src/core/proto/dst_entry_udp.cpp | 4 +-- src/core/proto/dst_entry_udp.h | 4 +-- src/core/sock/sockinfo_tcp.cpp | 3 +- src/core/sock/sockinfo_udp.cpp | 2 +- 17 files changed, 114 insertions(+), 68 deletions(-) diff --git a/src/core/dev/cq_mgr_tx.cpp b/src/core/dev/cq_mgr_tx.cpp index b3ea63367..6835a898a 100644 --- a/src/core/dev/cq_mgr_tx.cpp +++ b/src/core/dev/cq_mgr_tx.cpp @@ -36,6 +36,7 @@ #include #include #include +#include "lwip/tcp.h" #include "ring_simple.h" #include "hw_queue_tx.h" @@ -214,6 +215,14 @@ int cq_mgr_tx::poll_and_process_element_tx(uint64_t *p_cq_poll_sn) if (unlikely(cqe->op_own & 0x80) && is_error_opcode(cqe->op_own >> 4)) { // m_p_cq_stat->n_tx_cqe_error++; Future counter log_cqe_error(cqe); + + tcp_pcb *pcb = m_hqtx_ptr->m_sq_wqe_idx_to_prop[index].pcb; + + cq_logwarn("closing %p", pcb); + const auto tcp_state = get_tcp_state(pcb); + if (tcp_state != CLOSING && tcp_state != CLOSED) { + TCP_EVENT_ERR(pcb->errf, pcb->my_container, ERR_RST); + } } handle_sq_wqe_prop(index); diff --git a/src/core/dev/hw_queue_tx.cpp b/src/core/dev/hw_queue_tx.cpp index 48fe90595..162040f39 100644 --- a/src/core/dev/hw_queue_tx.cpp +++ b/src/core/dev/hw_queue_tx.cpp @@ -59,7 +59,7 @@ #define OCTOWORD 16 #define WQEBB 64 -//#define DBG_DUMP_WQE 1 +// #define DBG_DUMP_WQE 1 #ifdef DBG_DUMP_WQE #define dbg_dump_wqe(_addr, _size) \ @@ -348,8 +348,7 @@ void hw_queue_tx::release_tx_buffers() NOT_IN_USE(ret); // Suppress --enable-opt-log=high warning } -void hw_queue_tx::send_wqe(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr, xlio_tis *tis, - unsigned credits) +void hw_queue_tx::send_wqe(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr, unsigned credits) { mem_buf_desc_t *p_mem_buf_desc = (mem_buf_desc_t *)p_send_wqe->wr_id; /* Control tx completions: @@ -362,11 +361,11 @@ void hw_queue_tx::send_wqe(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr * m_n_unsignaled_count must be zero for this time. */ const bool request_comp = (p_mem_buf_desc->m_flags & mem_buf_desc_t::ZCOPY); - const bool skip_tx_poll = (attr & XLIO_TX_SKIP_POLL); + const bool skip_tx_poll = (attr.flags & XLIO_TX_SKIP_POLL); hwqtx_logfunc("VERBS send, unsignaled_count: %d", m_n_unsignaled_count); - send_to_wire(p_send_wqe, attr, request_comp, tis, credits); + send_to_wire(p_send_wqe, attr, request_comp, credits); if (!skip_tx_poll && is_signal_requested_for_last_wqe()) { uint64_t dummy_poll_sn = 0; @@ -826,27 +825,33 @@ inline int hw_queue_tx::fill_wqe_lso(xlio_ibv_send_wr *pswr, int data_len) return wqebbs; } -void hw_queue_tx::store_current_wqe_prop(mem_buf_desc_t *buf, unsigned credits, xlio_ti *ti) +void hw_queue_tx::store_current_wqe_prop(mem_buf_desc_t *buf, unsigned credits, xlio_ti *ti, + tcp_pcb *pcb /*=nullptr*/) { - m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index] = sq_wqe_prop { - .buf = buf, - .credits = credits, - .ti = ti, - .next = m_sq_wqe_prop_last, - }; + m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index] = sq_wqe_prop {.buf = buf, + .credits = credits, + .ti = ti, + .next = m_sq_wqe_prop_last, + .pcb = pcb}; m_sq_wqe_prop_last = &m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index]; if (ti) { ti->get(); } } +void hw_queue_tx::store_current_wqe_prop(mem_buf_desc_t *buf, unsigned credits, + xlio_send_attr &attr) +{ + store_current_wqe_prop(buf, credits, attr.tis, attr.pcb); +} + //! Send one RAW packet -void hw_queue_tx::send_to_wire(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr, - bool request_comp, xlio_tis *tis, unsigned credits) +void hw_queue_tx::send_to_wire(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr, + bool request_comp, unsigned credits) { struct xlio_mlx5_wqe_ctrl_seg *ctrl = nullptr; struct mlx5_wqe_eth_seg *eseg = nullptr; - uint32_t tisn = tis ? tis->get_tisn() : 0; + uint32_t tisn = attr.tis ? attr.tis->get_tisn() : 0; ctrl = (struct xlio_mlx5_wqe_ctrl_seg *)m_sq_wqe_hot; eseg = (struct mlx5_wqe_eth_seg *)((uint8_t *)m_sq_wqe_hot + sizeof(*ctrl)); @@ -866,10 +871,11 @@ void hw_queue_tx::send_to_wire(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_a */ *((uint64_t *)eseg) = 0; eseg->rsvd2 = 0; - eseg->cs_flags = (uint8_t)(attr & (XLIO_TX_PACKET_L3_CSUM | XLIO_TX_PACKET_L4_CSUM) & 0xff); + eseg->cs_flags = + (uint8_t)(attr.flags & (XLIO_TX_PACKET_L3_CSUM | XLIO_TX_PACKET_L4_CSUM) & 0xff); /* Store buffer descriptor */ - store_current_wqe_prop(reinterpret_cast(p_send_wqe->wr_id), credits, tis); + store_current_wqe_prop(reinterpret_cast(p_send_wqe->wr_id), credits, attr); /* Complete WQE */ int wqebbs = fill_wqe(p_send_wqe); @@ -1519,10 +1525,9 @@ void hw_queue_tx::trigger_completion_for_all_sent_packets() hwqtx_logdbg("No space in SQ to trigger completions with a post operation"); return; } - - send_to_wire(&send_wr, - (xlio_wr_tx_packet_attr)(XLIO_TX_PACKET_L3_CSUM | XLIO_TX_PACKET_L4_CSUM), - true, nullptr, credits); + xlio_send_attr send_attrs = + (xlio_wr_tx_packet_attr)(XLIO_TX_PACKET_L3_CSUM | XLIO_TX_PACKET_L4_CSUM); + send_to_wire(&send_wr, send_attrs, true, credits); } } diff --git a/src/core/dev/hw_queue_tx.h b/src/core/dev/hw_queue_tx.h index b99e1bccc..c6d6fdd38 100644 --- a/src/core/dev/hw_queue_tx.h +++ b/src/core/dev/hw_queue_tx.h @@ -41,6 +41,8 @@ #include "dev/cq_mgr_tx.h" #include "dev/cq_mgr_rx.h" #include "dev/dm_mgr.h" +#include "lwip/tcp.h" +#include "proto/dst_entry.h" #include "proto/mem_buf_desc.h" #include "proto/xlio_lwip.h" #include "util/sg_array.h" @@ -73,6 +75,7 @@ struct sq_wqe_prop { /* Transport interface (TIS/TIR) current WQE holds reference to. */ xlio_ti *ti; struct sq_wqe_prop *next; + tcp_pcb *pcb; }; // @class hw_queue_tx @@ -90,8 +93,7 @@ class hw_queue_tx : public xlio_ti_owner { void up(); void down(); - void send_wqe(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr, xlio_tis *tis, - unsigned credits); + void send_wqe(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr, unsigned credits); struct ibv_qp *get_ibv_qp() const { return m_mlx5_qp.qp; }; @@ -215,8 +217,8 @@ class hw_queue_tx : public xlio_ti_owner { void destroy_tis_cache(); void put_tls_tis_in_cache(xlio_tis *tis); - void send_to_wire(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr, bool request_comp, - xlio_tis *tis, unsigned credits); + void send_to_wire(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr, bool request_comp, + unsigned credits); void set_unsignaled_count(void) { m_n_unsignaled_count = m_n_sysvar_tx_num_wr_to_signal - 1; } @@ -260,7 +262,11 @@ class hw_queue_tx : public xlio_ti_owner { inline void tls_get_progress_params_wqe(xlio_ti *ti, uint32_t tirn, void *buf, uint32_t lkey); #endif /* DEFINED_UTLS */ - inline void store_current_wqe_prop(mem_buf_desc_t *wr_id, unsigned credits, xlio_ti *ti); + inline void store_current_wqe_prop(mem_buf_desc_t *wr_id, unsigned credits, + xlio_send_attr &attr); + + inline void store_current_wqe_prop(mem_buf_desc_t *wr_id, unsigned credits, xlio_ti *ti, + tcp_pcb *pcb = nullptr); inline int fill_wqe(xlio_ibv_send_wr *p_send_wqe); inline int fill_wqe_send(xlio_ibv_send_wr *pswr); inline int fill_wqe_lso(xlio_ibv_send_wr *pswr, int data_len); diff --git a/src/core/dev/ring.h b/src/core/dev/ring.h index a707885e0..d9b046702 100644 --- a/src/core/dev/ring.h +++ b/src/core/dev/ring.h @@ -50,6 +50,7 @@ struct xlio_tls_info; class sockinfo; class rfs_rule; class poll_group; +struct xlio_send_attr; #define ring_logpanic __log_info_panic #define ring_logerr __log_info_err @@ -101,7 +102,7 @@ class ring { virtual void send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr) = 0; virtual int send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis) = 0; + xlio_send_attr &attr) = 0; virtual int get_num_resources() const = 0; virtual int *get_rx_channel_fds(size_t &length) const @@ -233,7 +234,7 @@ class ring { }; virtual int get_supported_nvme_feature_mask() const { return 0; } - virtual void post_nop_fence(void) {} + virtual void post_nop_fence(void) { } virtual void post_dump_wqe(xlio_tis *tis, void *addr, uint32_t len, uint32_t lkey, bool first) { NOT_IN_USE(tis); diff --git a/src/core/dev/ring_bond.cpp b/src/core/dev/ring_bond.cpp index ccb57a285..095e6b087 100644 --- a/src/core/dev/ring_bond.cpp +++ b/src/core/dev/ring_bond.cpp @@ -444,14 +444,14 @@ void ring_bond::send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe } int ring_bond::send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis) + xlio_send_attr &attr) { mem_buf_desc_t *p_mem_buf_desc = (mem_buf_desc_t *)(p_send_wqe->wr_id); std::lock_guard lock(m_lock_ring_tx); if (is_active_member(p_mem_buf_desc->p_desc_owner, id)) { - return m_xmit_rings[id]->send_lwip_buffer(id, p_send_wqe, attr, tis); + return m_xmit_rings[id]->send_lwip_buffer(id, p_send_wqe, attr); } ring_logfunc("active ring=%p, silent packet drop (%p), (HA event?)", m_xmit_rings[id], diff --git a/src/core/dev/ring_bond.h b/src/core/dev/ring_bond.h index bd4807660..9e5e9e8ec 100644 --- a/src/core/dev/ring_bond.h +++ b/src/core/dev/ring_bond.h @@ -83,7 +83,7 @@ class ring_bond : public ring { virtual void send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr); virtual int send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis); + xlio_send_attr &attr); virtual void mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t *p_mem_buf_desc); virtual void mem_buf_desc_return_single_multi_ref(mem_buf_desc_t *p_mem_buf_desc, unsigned ref); virtual bool is_member(ring_slave *rng); diff --git a/src/core/dev/ring_simple.cpp b/src/core/dev/ring_simple.cpp index ecc77960a..f90c637d4 100644 --- a/src/core/dev/ring_simple.cpp +++ b/src/core/dev/ring_simple.cpp @@ -35,6 +35,7 @@ #include #include "ring_simple.h" +#include "proto/dst_entry.h" #include "util/valgrind.h" #include "util/sg_array.h" #include "sock/fd_collection.h" @@ -716,15 +717,15 @@ void ring_simple::mem_buf_rx_release(mem_buf_desc_t *p_mem_buf_desc) } /* note that this function is inline, so keep it above the functions using it */ -inline int ring_simple::send_buffer(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr, - xlio_tis *tis) +inline int ring_simple::send_buffer(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr) { int ret = 0; unsigned credits = m_hqtx->credits_calculate(p_send_wqe); if (likely(m_hqtx->credits_get(credits)) || - is_available_qp_wr(is_set(attr, XLIO_TX_PACKET_BLOCK), credits)) { - m_hqtx->send_wqe(p_send_wqe, attr, tis, credits); + is_available_qp_wr(is_set(attr.flags, XLIO_TX_PACKET_BLOCK), credits)) { + xlio_send_attr send_attr = attr; + m_hqtx->send_wqe(p_send_wqe, send_attr, credits); } else { ring_logdbg("Silent packet drop, SQ is full!"); ret = -1; @@ -754,16 +755,17 @@ void ring_simple::send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_w } std::lock_guard lock(m_lock_ring_tx); - int ret = send_buffer(p_send_wqe, attr, nullptr); + xlio_send_attr send_attr = attr; + int ret = send_buffer(p_send_wqe, send_attr); send_status_handler(ret, p_send_wqe); } int ring_simple::send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis) + xlio_send_attr &attr) { NOT_IN_USE(id); std::lock_guard lock(m_lock_ring_tx); - int ret = send_buffer(p_send_wqe, attr, tis); + int ret = send_buffer(p_send_wqe, attr); send_status_handler(ret, p_send_wqe); return ret; } diff --git a/src/core/dev/ring_simple.h b/src/core/dev/ring_simple.h index 862087cbe..6e4b60d2c 100644 --- a/src/core/dev/ring_simple.h +++ b/src/core/dev/ring_simple.h @@ -83,8 +83,7 @@ class ring_simple : public ring_slave { void mem_buf_desc_return_to_owner_tx(mem_buf_desc_t *p_mem_buf_desc); void mem_buf_desc_return_to_owner_rx(mem_buf_desc_t *p_mem_buf_desc, void *pv_fd_ready_array = nullptr); - inline int send_buffer(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr, - xlio_tis *tis); + inline int send_buffer(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr); bool is_up() override; void start_active_queue_tx(); void start_active_queue_rx(); @@ -97,7 +96,7 @@ class ring_simple : public ring_slave { void send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr) override; int send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis) override; + xlio_send_attr &attr) override; void mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t *p_mem_buf_desc) override; void mem_buf_desc_return_single_multi_ref(mem_buf_desc_t *p_mem_buf_desc, unsigned ref) override; diff --git a/src/core/dev/ring_tap.cpp b/src/core/dev/ring_tap.cpp index 0e569e620..b16e79194 100644 --- a/src/core/dev/ring_tap.cpp +++ b/src/core/dev/ring_tap.cpp @@ -35,6 +35,7 @@ #include "ring_tap.h" #include +#include "proto/dst_entry.h" #include "util/sg_array.h" #include "sock/fd_collection.h" #include "dev/net_device_table_mgr.h" @@ -341,17 +342,17 @@ void ring_tap::send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, attr & XLIO_TX_PACKET_L4_CSUM); std::lock_guard lock(m_lock_ring_tx); - int ret = send_buffer(p_send_wqe, attr); + xlio_send_attr send_attr = attr; + int ret = send_buffer(p_send_wqe, send_attr); send_status_handler(ret, p_send_wqe); } int ring_tap::send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis) + xlio_send_attr &attr) { NOT_IN_USE(id); - NOT_IN_USE(tis); - compute_tx_checksum((mem_buf_desc_t *)(p_send_wqe->wr_id), attr & XLIO_TX_PACKET_L3_CSUM, - attr & XLIO_TX_PACKET_L4_CSUM); + compute_tx_checksum((mem_buf_desc_t *)(p_send_wqe->wr_id), attr.flags & XLIO_TX_PACKET_L3_CSUM, + attr.flags & XLIO_TX_PACKET_L4_CSUM); std::lock_guard lock(m_lock_ring_tx); int ret = send_buffer(p_send_wqe, attr); @@ -597,7 +598,7 @@ int ring_tap::mem_buf_tx_release(mem_buf_desc_t *buff_list, bool b_accounting, b return count; } -int ring_tap::send_buffer(xlio_ibv_send_wr *wr, xlio_wr_tx_packet_attr attr) +int ring_tap::send_buffer(xlio_ibv_send_wr *wr, xlio_send_attr &attr) { int ret = 0; iovec iovec[wr->num_sge]; diff --git a/src/core/dev/ring_tap.h b/src/core/dev/ring_tap.h index 011e224b9..0f962d333 100644 --- a/src/core/dev/ring_tap.h +++ b/src/core/dev/ring_tap.h @@ -66,7 +66,7 @@ class ring_tap : public ring_slave { virtual void send_ring_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr); virtual int send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis); + xlio_send_attr &attr); virtual void mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t *p_mem_buf_desc); virtual void mem_buf_desc_return_single_multi_ref(mem_buf_desc_t *p_mem_buf_desc, unsigned ref); virtual mem_buf_desc_t *mem_buf_tx_get(ring_user_id_t id, bool b_block, pbuf_type type, @@ -85,7 +85,7 @@ class ring_tap : public ring_slave { NOT_IN_USE(poll_sn); return 0; } - virtual void adapt_cq_moderation() {} + virtual void adapt_cq_moderation() { } virtual int socketxtreme_poll(struct xlio_socketxtreme_completion_t *xlio_completions, unsigned int ncompletions, int flags) @@ -101,7 +101,7 @@ class ring_tap : public ring_slave { NOT_IN_USE(rate_limit); return 0; } - void inc_cq_moderation_stats() {} + void inc_cq_moderation_stats() { } virtual uint32_t get_tx_user_lkey(void *addr, size_t length) { NOT_IN_USE(addr); @@ -134,7 +134,7 @@ class ring_tap : public ring_slave { int prepare_flow_message(xlio_msg_flow &data, msg_flow_t flow_action); int process_element_rx(void *pv_fd_ready_array); bool request_more_rx_buffers(); - int send_buffer(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_attr attr); + int send_buffer(xlio_ibv_send_wr *p_send_wqe, xlio_send_attr &attr); void send_status_handler(int ret, xlio_ibv_send_wr *p_send_wqe); void tap_create(net_device_val *p_ndev); void tap_destroy(); diff --git a/src/core/proto/dst_entry.h b/src/core/proto/dst_entry.h index 9445a8e99..654b62c9d 100644 --- a/src/core/proto/dst_entry.h +++ b/src/core/proto/dst_entry.h @@ -40,6 +40,7 @@ #include "core/util/if.h" #include +#include "lwip/tcp.h" #include "vlogger/vlogger.h" #include "utils/lock_wrapper.h" #include "core/proto/route_entry.h" @@ -80,6 +81,27 @@ struct xlio_send_attr { uint16_t mss; size_t length; xlio_tis *tis; + tcp_pcb *pcb; + + // converting constructor + xlio_send_attr(const xlio_wr_tx_packet_attr &packet_attr) + : flags(packet_attr) + , mss(0) + , length(0) + , tis(nullptr) + , pcb(nullptr) + { + } + + xlio_send_attr(xlio_wr_tx_packet_attr other_flags, uint16_t other_mss, size_t other_length, + xlio_tis *other_tis, tcp_pcb *other_pcb) + : flags(other_flags) + , mss(other_mss) + , length(other_length) + , tis(other_tis) + , pcb(other_pcb) + { + } }; class dst_entry : public cache_observer, public tostr { @@ -93,8 +115,8 @@ class dst_entry : public cache_observer, public tostr { virtual void notify_cb(event *ev); virtual bool prepare_to_send(struct xlio_rate_limit_t &rate_limit, bool skip_rules = false); - virtual ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr) = 0; - virtual ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr, + virtual ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr) = 0; + virtual ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr, struct xlio_rate_limit_t &rate_limit, int flags = 0, sockinfo *sock = nullptr, tx_call_t call_type = TX_UNDEF) = 0; diff --git a/src/core/proto/dst_entry_tcp.cpp b/src/core/proto/dst_entry_tcp.cpp index 73753ef54..84e980686 100644 --- a/src/core/proto/dst_entry_tcp.cpp +++ b/src/core/proto/dst_entry_tcp.cpp @@ -67,7 +67,7 @@ transport_t dst_entry_tcp::get_transport(const sock_addr &to) return TRANS_XLIO; } -ssize_t dst_entry_tcp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr) +ssize_t dst_entry_tcp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr) { int ret = 0; void *p_pkt; @@ -255,7 +255,7 @@ ssize_t dst_entry_tcp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_ } } - ret = send_lwip_buffer(m_id, p_send_wqe, attr.flags, attr.tis); + ret = send_lwip_buffer(m_id, p_send_wqe, attr); } else { // We don'nt support inline in this case, since we believe that this a very rare case mem_buf_desc_t *p_mem_buf_desc; size_t total_packet_len = 0; @@ -315,7 +315,7 @@ ssize_t dst_entry_tcp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_ return ret; } -ssize_t dst_entry_tcp::slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr, +ssize_t dst_entry_tcp::slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr, struct xlio_rate_limit_t &rate_limit, int flags /*= 0*/, sockinfo *sock /*= 0*/, tx_call_t call_type /*= 0*/) { diff --git a/src/core/proto/dst_entry_tcp.h b/src/core/proto/dst_entry_tcp.h index 7b2c846e7..2b0c51fb3 100644 --- a/src/core/proto/dst_entry_tcp.h +++ b/src/core/proto/dst_entry_tcp.h @@ -50,8 +50,8 @@ class dst_entry_tcp : public dst_entry { resource_allocation_key &ring_alloc_logic); virtual ~dst_entry_tcp(); - ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr); - ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr, + ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr); + ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr, struct xlio_rate_limit_t &rate_limit, int flags = 0, sockinfo *sock = nullptr, tx_call_t call_type = TX_UNDEF); ssize_t slow_send_neigh(const iovec *p_iov, size_t sz_iov, @@ -77,13 +77,13 @@ class dst_entry_tcp : public dst_entry { uint64_t m_user_huge_page_mask; inline int send_lwip_buffer(ring_user_id_t id, xlio_ibv_send_wr *p_send_wqe, - xlio_wr_tx_packet_attr attr, xlio_tis *tis) + xlio_send_attr &attr) { - if (unlikely(is_set(attr, XLIO_TX_PACKET_DUMMY))) { + if (unlikely(is_set(attr.flags, XLIO_TX_PACKET_DUMMY))) { if (m_p_ring->get_hw_dummy_send_support(id, p_send_wqe)) { xlio_ibv_wr_opcode last_opcode = m_p_send_wqe_handler->set_opcode(*p_send_wqe, XLIO_IBV_WR_NOP); - m_p_ring->send_lwip_buffer(id, p_send_wqe, attr, tis); + m_p_ring->send_lwip_buffer(id, p_send_wqe, attr); m_p_send_wqe_handler->set_opcode(*p_send_wqe, last_opcode); } /* no need to free the buffer if dummy send is not supported, as for lwip buffers we @@ -94,7 +94,7 @@ class dst_entry_tcp : public dst_entry { return 0; } - return m_p_ring->send_lwip_buffer(id, p_send_wqe, attr, tis); + return m_p_ring->send_lwip_buffer(id, p_send_wqe, attr); } }; diff --git a/src/core/proto/dst_entry_udp.cpp b/src/core/proto/dst_entry_udp.cpp index 9d4418aa4..20e808586 100644 --- a/src/core/proto/dst_entry_udp.cpp +++ b/src/core/proto/dst_entry_udp.cpp @@ -476,7 +476,7 @@ ssize_t dst_entry_udp::fast_send_fragmented(const iovec *p_iov, const ssize_t sz return sz_data_payload; } -ssize_t dst_entry_udp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr) +ssize_t dst_entry_udp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr) { /* Suppress flags that should not be used anymore * to avoid conflicts with XLIO_TX_PACKET_L3_CSUM and XLIO_TX_PACKET_L4_CSUM @@ -495,7 +495,7 @@ ssize_t dst_entry_udp::fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_ } } -ssize_t dst_entry_udp::slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr, +ssize_t dst_entry_udp::slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr, struct xlio_rate_limit_t &rate_limit, int flags /*= 0*/, sockinfo *sock /*= 0*/, tx_call_t call_type /*= 0*/) { diff --git a/src/core/proto/dst_entry_udp.h b/src/core/proto/dst_entry_udp.h index 5da708d29..c7980e919 100644 --- a/src/core/proto/dst_entry_udp.h +++ b/src/core/proto/dst_entry_udp.h @@ -43,8 +43,8 @@ class dst_entry_udp : public dst_entry { resource_allocation_key &ring_alloc_logic); virtual ~dst_entry_udp(); - ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr); - ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr, + ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr); + ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr &attr, struct xlio_rate_limit_t &rate_limit, int flags = 0, sockinfo *sock = nullptr, tx_call_t call_type = TX_UNDEF); static bool fast_send_fragmented_ipv6(mem_buf_desc_t *p_mem_buf_desc, const iovec *p_iov, diff --git a/src/core/sock/sockinfo_tcp.cpp b/src/core/sock/sockinfo_tcp.cpp index 77ac38ec6..b9272f410 100644 --- a/src/core/sock/sockinfo_tcp.cpp +++ b/src/core/sock/sockinfo_tcp.cpp @@ -1169,6 +1169,7 @@ ssize_t sockinfo_tcp::tcp_tx(xlio_tx_call_attr_t &tx_arg) */ ssize_t sockinfo_tcp::tcp_tx_slow_path(xlio_tx_call_attr_t &tx_arg) { + // TODO - call tcp_close(pcb) if you identify the error iovec *p_iov = tx_arg.attr.iov; size_t sz_iov = tx_arg.attr.sz_iov; int flags = tx_arg.attr.flags; @@ -1368,7 +1369,7 @@ err_t sockinfo_tcp::ip_output(struct pbuf *p, struct tcp_seg *seg, void *v_p_con tcp_iovec lwip_iovec[max_count]; xlio_send_attr attr = { (xlio_wr_tx_packet_attr)(flags | (!!p_si_tcp->is_xlio_socket() * XLIO_TX_SKIP_POLL)), - p_si_tcp->m_pcb.mss, 0, nullptr}; + p_si_tcp->m_pcb.mss, 0, nullptr, (tcp_pcb *)v_p_conn}; int count = 0; void *cur_end; diff --git a/src/core/sock/sockinfo_udp.cpp b/src/core/sock/sockinfo_udp.cpp index cced4077e..f47c04cb0 100644 --- a/src/core/sock/sockinfo_udp.cpp +++ b/src/core/sock/sockinfo_udp.cpp @@ -2165,7 +2165,7 @@ ssize_t sockinfo_udp::tx(xlio_tx_call_attr_t &tx_arg) } { - xlio_send_attr attr = {(xlio_wr_tx_packet_attr)0, 0, 0, nullptr}; + xlio_send_attr attr = (xlio_wr_tx_packet_attr)0; bool b_blocking = m_b_blocking; if (unlikely(__flags & MSG_DONTWAIT)) { b_blocking = false;