diff --git a/src/interface/comm.rs b/src/interface/comm.rs index 392cdeceb..d7ecd24c9 100644 --- a/src/interface/comm.rs +++ b/src/interface/comm.rs @@ -2,7 +2,8 @@ // // // // -use std::mem::size_of; +use crate::interface; +use std::mem::{size_of}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::fs::read_to_string; use std::str::from_utf8; @@ -206,7 +207,7 @@ pub struct SockaddrV6 { #[derive(Debug)] pub struct Socket { pub refcnt: i32, - raw_sys_fd: i32 + pub raw_sys_fd: i32 } impl Socket { @@ -340,8 +341,8 @@ impl Socket { pub fn setsockopt(&self, level: i32, optname: i32, optval: i32) -> i32 { let valbuf = optval; - let sor = unsafe{libc::setsockopt(self.raw_sys_fd, level, optname, (&valbuf as *const i32).cast::(), size_of::() as u32)}; - sor + let ret = unsafe{libc::setsockopt(self.raw_sys_fd, level, optname, (&valbuf as *const i32).cast::(), size_of::() as u32)}; + ret } pub fn shutdown(&self, how: i32) -> i32 { @@ -366,3 +367,95 @@ impl Drop for Socket { pub fn getifaddrs_from_file() -> String { read_to_string(NET_DEV_FILENAME).expect("No net_devices file present!").to_owned() } + +// Implementations of select related FD_SET structure +pub struct FdSet(libc::fd_set); + +impl FdSet { + pub fn new() -> FdSet { + unsafe { + let mut raw_fd_set = std::mem::MaybeUninit::::uninit(); + libc::FD_ZERO(raw_fd_set.as_mut_ptr()); + FdSet(raw_fd_set.assume_init()) + } + } + + pub fn new_from_ptr(raw_fdset_ptr: *const libc::fd_set) -> &'static mut FdSet { + unsafe { + &mut *(raw_fdset_ptr as *mut FdSet) + } + } + + // copy the src FdSet into self + pub fn copy_from(&mut self, src_fds: &FdSet) { + unsafe { + std::ptr::copy_nonoverlapping(&src_fds.0 as *const libc::fd_set, &mut self.0 as *mut libc::fd_set, 1); + } + } + + // turn off the fd bit in fd_set (currently only used by the tests) + #[allow(dead_code)] + pub fn clear(&mut self, fd: i32) { + unsafe { libc::FD_CLR(fd, &mut self.0) } + } + + // turn on the fd bit in fd_set + pub fn set(&mut self, fd: i32) { + unsafe { libc::FD_SET(fd, &mut self.0) } + } + + // return true if the bit for fd is set, false otherwise + pub fn is_set(&self, fd: i32) -> bool { + unsafe { libc::FD_ISSET(fd, &self.0) } + } + + pub fn is_empty(&self) -> bool { + let fd_array: &[u8] = unsafe { + std::slice::from_raw_parts(&self.0 as *const _ as *const u8, size_of::()) + }; + fd_array.iter().all(|&byte| byte == 0) + } + + // for each fd, if kernel_fds turned it on, then self will turn the corresponding tranlated fd on + pub fn set_from_kernelfds_and_translate(&mut self, kernel_fds: &FdSet, nfds: i32, rawfd_lindfd_tuples: &Vec<(i32, i32)>) { + for fd in 0..nfds { + if !kernel_fds.is_set(fd) { + continue; + } + // translate and set + if let Some((_, lindfd)) = rawfd_lindfd_tuples.iter().find(|(rawfd, _)| *rawfd == fd) { + self.set(*lindfd); + } + } + } +} + +// for unwrapping in kernel_select +fn to_fdset_ptr(opt: Option<&mut FdSet>) -> *mut libc::fd_set { + match opt { + None => std::ptr::null_mut(), + Some(&mut FdSet(ref mut raw_fd_set)) => raw_fd_set, + } +} + +pub fn kernel_select(nfds: libc::c_int, readfds: Option<&mut FdSet>, writefds: Option<&mut FdSet>, errorfds: Option<&mut FdSet>) -> i32 { + // Call libc::select and store the result + let result = unsafe { + // Create a timeval struct with zero timeout + + let mut kselect_timeout = libc::timeval { + tv_sec: 0, // 0 seconds + tv_usec: 0, // 0 microseconds + }; + + libc::select( + nfds, + to_fdset_ptr(readfds), + to_fdset_ptr(writefds), + to_fdset_ptr(errorfds), + &mut kselect_timeout as *mut libc::timeval, + ) + }; + + return result; +} diff --git a/src/interface/types.rs b/src/interface/types.rs index 5eec9d682..2f78c0814 100644 --- a/src/interface/types.rs +++ b/src/interface/types.rs @@ -192,6 +192,7 @@ pub union Arg { pub dispatch_constsigsett: *const SigsetType, pub dispatch_structitimerval: *mut ITimerVal, pub dispatch_conststructitimerval: *const ITimerVal, + pub dispatch_fdset: *mut libc::fd_set, } @@ -271,6 +272,15 @@ pub fn get_mutcbuf_null(union_argument: Arg) -> Result, i32> { return Ok(None); } +pub fn get_fdset(union_argument: Arg) -> Result, i32> { + let data: *mut libc::fd_set = unsafe{union_argument.dispatch_fdset}; + if !data.is_null() { + let internal_fds: &mut interface::FdSet = interface::FdSet::new_from_ptr(data); + return Ok(Some(internal_fds)); + } + return Ok(None); +} + pub fn get_cstr<'a>(union_argument: Arg) -> Result<&'a str, i32> { //first we check that the pointer is not null @@ -412,49 +422,6 @@ pub fn get_sockpair<'a>(union_argument: Arg) -> Result<&'a mut SockPair, i32> { return Err(syscall_error(Errno::EFAULT, "dispatcher", "input data not valid")); } -// turn on the fd bit in fd_set -pub fn fd_set_insert(fd_set: *mut u8, fd: i32) { - let byte_offset = fd / 8; - let byte_ptr = fd_set.wrapping_offset(byte_offset as isize); - let bit_offset = fd & 0b111; - unsafe{*byte_ptr |= 1 << bit_offset;} -} - -// turn off the fd bit in fd_set -pub fn fd_set_remove(fd_set: *mut u8, fd: i32) { - let byte_offset = fd / 8; - let byte_ptr = fd_set.wrapping_offset(byte_offset as isize); - let bit_offset = fd & 0b111; - unsafe{*byte_ptr &= !(1 << bit_offset);} -} - -// return true if the bit for fd is set in fd_set, false otherwise -pub fn fd_set_check_fd(fd_set: *const u8, fd: i32) -> bool { - let byte_offset = fd / 8; - let byte_ptr = fd_set.wrapping_offset(byte_offset as isize); - let bit_offset = fd & 0b111; - return (unsafe{*byte_ptr}) & (1 << bit_offset) != 0; -} - -pub fn fd_set_copy(src_set: *const u8, dst_set: *mut u8, nfds: i32) { - for fd in 0..nfds { - if interface::fd_set_check_fd(src_set, fd) { - interface::fd_set_insert(dst_set, fd); - } else { - interface::fd_set_remove(dst_set, fd); - } - } -} - -pub fn fd_set_is_empty(fd_set: *const u8, highest_fd: i32) -> bool { - for fd in 0..highest_fd + 1 { - if fd_set_check_fd(fd_set, fd) { - return false; - } - } - return true; -} - pub fn get_sockaddr(union_argument: Arg, addrlen: u32) -> Result { let pointer = unsafe{union_argument.dispatch_constsockaddrstruct}; if !pointer.is_null() { diff --git a/src/safeposix/cage.rs b/src/safeposix/cage.rs index e45816b31..1b88901c8 100644 --- a/src/safeposix/cage.rs +++ b/src/safeposix/cage.rs @@ -40,6 +40,8 @@ pub struct StreamDesc { #[derive(Debug, Clone)] pub struct SocketDesc { pub flags: i32, + pub domain: i32, + pub rawfd: i32, pub handle: interface::RustRfc>, pub advlock: interface::RustRfc, } diff --git a/src/safeposix/dispatcher.rs b/src/safeposix/dispatcher.rs index 19df48818..5a9970f28 100644 --- a/src/safeposix/dispatcher.rs +++ b/src/safeposix/dispatcher.rs @@ -386,7 +386,7 @@ pub extern "C" fn dispatcher(cageid: u64, callnum: i32, arg1: Arg, arg2: Arg, ar if nfds < 0 { //RLIMIT_NOFILE check as well? return syscall_error(Errno::EINVAL, "select", "The number of fds passed was invalid"); } - check_and_dispatch!(cage.select_syscall, Ok::(nfds), interface::get_mutcbuf_null(arg2), interface::get_mutcbuf_null(arg3), interface::get_mutcbuf_null(arg4), interface::duration_fromtimeval(arg5)) + check_and_dispatch!(cage.select_syscall, Ok::(nfds), interface::get_fdset(arg2), interface::get_fdset(arg3), interface::get_fdset(arg4), interface::duration_fromtimeval(arg5)) } POLL_SYSCALL => { let nfds = get_onearg!(interface::get_usize(arg2)); diff --git a/src/safeposix/net.rs b/src/safeposix/net.rs index 9f8bc2a15..344935eff 100644 --- a/src/safeposix/net.rs +++ b/src/safeposix/net.rs @@ -82,7 +82,8 @@ pub struct UnixSocketInfo { #[derive(Debug)] pub struct SocketHandle { pub innersocket: Option, - pub options: i32, + pub socket_options: i32, + pub tcp_options: i32, pub state: ConnState, pub protocol: i32, pub domain: i32, @@ -387,3 +388,33 @@ impl NetMetadata { domainsock_paths } } + +pub struct SelectInetInfo { + pub rawfd_lindfd_tuples: Vec<(i32, i32)>, + pub kernel_fds: interface::FdSet, + pub highest_raw_fd: i32, +} + +impl SelectInetInfo { + pub fn new() -> Self { + SelectInetInfo { + rawfd_lindfd_tuples: Vec::new(), + kernel_fds: interface::FdSet::new(), + highest_raw_fd: 0, + } + } +} + +pub fn update_readfds_from_kernel_select(readfds: &mut interface::FdSet, inet_info: &mut SelectInetInfo, retval: &mut i32) -> i32 { + let kernel_ret; + // note that this select call always have timeout = 0, so it doesn't block + + kernel_ret = interface::kernel_select(inet_info.highest_raw_fd + 1, Some(&mut inet_info.kernel_fds), None, None); + if kernel_ret > 0 { + // increment retval of our select + *retval += kernel_ret; + // translate the kernel checked fds to lindfds, and add to our new_writefds + readfds.set_from_kernelfds_and_translate(&mut inet_info.kernel_fds, inet_info.highest_raw_fd + 1, &inet_info.rawfd_lindfd_tuples); + } + return kernel_ret; +} diff --git a/src/safeposix/syscalls/net_calls.rs b/src/safeposix/syscalls/net_calls.rs index 10272531f..d59b19b8d 100644 --- a/src/safeposix/syscalls/net_calls.rs +++ b/src/safeposix/syscalls/net_calls.rs @@ -18,6 +18,8 @@ impl Cage { let sockfd = SocketDesc { flags: flags, + domain: domain, + rawfd: -1, // RawFD set in bind for inet, or stays at -1 for others handle: interface::RustRfc::new(interface::RustLock::new(Self::mksockhandle(domain, socktype, protocol, conn, flags))), advlock: interface::RustRfc::new(interface::AdvisoryLock::new()), }; //currently on failure to create handle we create successfully but it's corrupted, change? @@ -108,7 +110,7 @@ impl Cage { let thissock = interface::Socket::new(sockhandle.domain, sockhandle.socktype, sockhandle.protocol); for reuse in [SO_REUSEPORT, SO_REUSEADDR] { - if sockhandle.options & (1 << reuse) == 0 {continue;} + if sockhandle.socket_options & (1 << reuse) == 0 {continue;} let sockret = thissock.setsockopt(SOL_SOCKET, reuse, 1); if sockret < 0 { panic!("Cannot handle failure in setsockopt on socket creation"); @@ -192,7 +194,7 @@ impl Cage { fn bind_inner_socket_inet(&self, sockhandle: &mut SocketHandle, newsockaddr: &mut interface::GenSockaddr, prereserved: bool) -> i32 { // INET Sockets - let intent_to_rebind = sockhandle.options & (1 << SO_REUSEPORT) != 0; + let intent_to_rebind = sockhandle.socket_options & (1 << SO_REUSEPORT) != 0; Self::force_innersocket(sockhandle); let newlocalport = if prereserved { @@ -215,7 +217,6 @@ impl Cage { 0 } - pub fn bind_inner(&self, fd: i32, localaddr: &interface::GenSockaddr, prereserved: bool) -> i32 { let checkedfd = self.get_filedescriptor(fd).unwrap(); @@ -294,11 +295,12 @@ impl Cage { if remoteaddr.get_family() != sockhandle.domain as u16 { return syscall_error(Errno::EINVAL, "connect", "An address with an invalid family for the given domain was specified"); } + match sockhandle.protocol { - IPPROTO_UDP => return self.connect_udp(&mut *sockhandle, remoteaddr), + IPPROTO_UDP => return self.connect_udp(&mut *sockhandle, sockfdobj, remoteaddr), IPPROTO_TCP => return self.connect_tcp(&mut *sockhandle, sockfdobj, remoteaddr), _ => return syscall_error(Errno::EOPNOTSUPP, "connect", "Unknown protocol in connect"), - } + }; } _ => { return syscall_error(Errno::ENOTSOCK, "connect", "file descriptor refers to something other than a socket"); @@ -309,7 +311,7 @@ impl Cage { } } - fn connect_udp(&self, sockhandle: &mut SocketHandle, remoteaddr: &interface::GenSockaddr) -> i32 { + fn connect_udp(&self, sockhandle: &mut SocketHandle, sockfdobj: &mut SocketDesc, remoteaddr: &interface::GenSockaddr) -> i32 { //for UDP, just set the addresses and return //we don't need to check connection state for UDP, it's connectionless! sockhandle.remoteaddr = Some(remoteaddr.clone()); @@ -321,7 +323,10 @@ impl Cage { Err(e) => return e, }; - return self.bind_inner_socket(&mut *sockhandle, &localaddr, true); + let bindret = self.bind_inner_socket(&mut *sockhandle, &localaddr, true); + // udp now connected so lets set rawfd for select + sockfdobj.rawfd = sockhandle.innersocket.as_ref().unwrap().raw_sys_fd; + return bindret; } }; } @@ -334,7 +339,7 @@ impl Cage { match sockhandle.domain { AF_UNIX => self.connect_tcp_unix(&mut *sockhandle, sockfdobj, remoteaddr), - AF_INET | AF_INET6 => self.connect_tcp_inet(&mut *sockhandle, remoteaddr), + AF_INET | AF_INET6 => self.connect_tcp_inet(&mut *sockhandle, sockfdobj, remoteaddr), _ => {return syscall_error(Errno::EINVAL, "connect", "Unsupported domain provided")}, } } @@ -380,7 +385,7 @@ impl Cage { return 0; } - fn connect_tcp_inet(&self, sockhandle: &mut SocketHandle, remoteaddr: &interface::GenSockaddr) -> i32 { + fn connect_tcp_inet(&self, sockhandle: &mut SocketHandle, sockfdobj: &mut SocketDesc, remoteaddr: &interface::GenSockaddr) -> i32 { // TCP inet domain logic //for TCP, actually create the internal socket object and connect it let remoteclone = remoteaddr.clone(); @@ -422,6 +427,8 @@ impl Cage { sockhandle.state = ConnState::CONNECTED; sockhandle.remoteaddr = Some(remoteaddr.clone()); sockhandle.errno = 0; + // set the rawfd for select + sockfdobj.rawfd = sockhandle.innersocket.as_ref().unwrap().raw_sys_fd; if inprogress { sockhandle.state = ConnState::INPROGRESS; return syscall_error(Errno::EINPROGRESS, "connect", "The libc call to connect is in progress."); @@ -431,11 +438,12 @@ impl Cage { } } - fn mksockhandle(domain: i32, socktype: i32, protocol: i32, conn: ConnState, options: i32) -> SocketHandle { + fn mksockhandle(domain: i32, socktype: i32, protocol: i32, conn: ConnState, socket_options: i32) -> SocketHandle { SocketHandle { innersocket: None, - options: options, + socket_options: socket_options, + tcp_options: 0, state: conn, protocol: protocol, domain: domain, @@ -457,10 +465,10 @@ impl Cage { } let checkedfd = self.get_filedescriptor(fd).unwrap(); - let unlocked_fd = checkedfd.write(); - if let Some(filedesc_enum) = &*unlocked_fd { + let mut unlocked_fd = checkedfd.write(); + if let Some(filedesc_enum) = &mut *unlocked_fd { match filedesc_enum { - Socket(sockfdobj) => { + Socket(ref mut sockfdobj) => { let sock_tmp = sockfdobj.handle.clone(); let mut sockhandle = sock_tmp.write(); @@ -637,6 +645,11 @@ impl Cage { fn recv_common_inner_tcp(&self, sockhandle: &mut interface::RustLockWriteGuard, sockfdobj: &mut SocketDesc, buf: *mut u8, buflen: usize, flags: i32, addr: &mut Option<&mut interface::GenSockaddr>) -> i32 { + // maybe select reported a INPROGRESS tcp socket as readable, so re-check the state here + if sockhandle.state == ConnState::INPROGRESS && sockhandle.innersocket.as_ref().unwrap().check_rawconnection() { + sockhandle.state = ConnState::CONNECTED; + } + if (sockhandle.state != ConnState::CONNECTED) && (sockhandle.state != ConnState::CONNRDONLY) { return syscall_error(Errno::ENOTCONN, "recvfrom", "The descriptor is not connected"); } @@ -754,12 +767,13 @@ impl Cage { fn recv_common_inner_udp(&self, sockhandle: &mut interface::RustLockWriteGuard, sockfdobj: &mut SocketDesc, buf: *mut u8, buflen: usize, addr: &mut Option<&mut interface::GenSockaddr>) -> i32 { let binddomain = if let Some(baddr) = addr { baddr.get_family() as i32 - } else { AF_INET }; + } else { AF_INET }; - let ibindret = self._implicit_bind(&mut *sockhandle, binddomain); - if ibindret < 0 { - return ibindret; - } + let ibindret = self._implicit_bind(&mut *sockhandle, binddomain); + if ibindret < 0 { + return ibindret; + } + loop { // loop for blocking sockets //if the remoteaddr is set and addr is not, use remoteaddr @@ -851,7 +865,6 @@ impl Cage { Err(()) => panic!("Unknown errno value from socket bind within listen returned!"), }; } - } let ladr = sockhandle.localaddr.unwrap().clone(); //must have been populated by implicit bind @@ -870,6 +883,9 @@ impl Cage { sockhandle.state = ConnState::NOTCONNECTED; return lr; }; + + //set rawfd for select + sockfdobj.rawfd = sockhandle.innersocket.as_ref().unwrap().raw_sys_fd; if !NET_METADATA.pending_conn_table.contains_key(&porttuple) { NET_METADATA.pending_conn_table.insert(porttuple.clone(), vec![]); } @@ -1096,11 +1112,10 @@ impl Cage { newsockhandle.localaddr = Some(sockhandle.localaddr.unwrap().clone()); newsockhandle.remoteaddr = Some(remote_addr.clone()); newsockhandle.state = ConnState::CONNECTED; + let _insertval = newfdoption.insert(Socket(newsockfd)); *addr = remote_addr; //populate addr with what address it connected to - - return newfd; } _ => { @@ -1118,7 +1133,7 @@ impl Cage { if sockhandle.state != ConnState::LISTEN { return syscall_error(Errno::EINVAL, "accept", "Socket must be listening before accept is called"); } - let newsockfd = self._socket_initializer(sockhandle.domain, sockhandle.socktype, sockhandle.protocol, sockfdobj.flags & O_NONBLOCK != 0, sockfdobj.flags & O_CLOEXEC != 0, ConnState::CONNECTED); + let mut newsockfd = self._socket_initializer(sockhandle.domain, sockhandle.socktype, sockhandle.protocol, sockfdobj.flags & O_NONBLOCK != 0, sockfdobj.flags & O_CLOEXEC != 0, ConnState::CONNECTED); // if we got a pending connection in select/poll/whatever, return that here instead let ladr = sockhandle.localaddr.unwrap().clone(); //must have been populated by implicit bind @@ -1176,6 +1191,9 @@ impl Cage { //create socket object for new connected socket newsockhandle.innersocket = Some(acceptedsock); + // set lock-free rawfd for select + newsockfd.rawfd = newsockhandle.innersocket.as_ref().unwrap().raw_sys_fd; + let _insertval = newfdoption.insert(Socket(newsockfd)); *addr = remote_addr; //populate addr with what address it connected to @@ -1188,33 +1206,7 @@ impl Cage { } } - fn _nonblock_peek_read(&self, fd: i32) -> bool { - let flags = MSG_PEEK; - let mut buf = [0u8; 1]; - let bufptr = buf.as_mut_ptr(); - let checkedfd = self.get_filedescriptor(fd).unwrap(); - let mut unlocked_fd = checkedfd.write(); - if let Some(ref mut filedesc_enum) = &mut *unlocked_fd { - let oldflags; - if let Socket(ref mut sockfdobj) = filedesc_enum { - oldflags = sockfdobj.flags; - sockfdobj.flags |= O_NONBLOCK; - } else { - return false; - } - let retval = self.recv_common_inner(filedesc_enum, bufptr, 1, flags, &mut None); - if let Socket(ref mut sockfdobj) = filedesc_enum { - sockfdobj.flags = oldflags; - } else { - unreachable!(); - } - return retval >= 0; //it it's less than 0, it failed, it it's 0 peer is dead, 1 it succeeded, in the latter 2 it's true - } else { - return false; - } - } - - pub fn select_syscall(&self, nfds: i32, readfds: Option<*mut u8>, writefds: Option<*mut u8>, exceptfds: Option<*mut u8>, timeout: Option) -> i32 { + pub fn select_syscall(&self, nfds: i32, readfds: Option<&mut interface::FdSet>, writefds: Option<&mut interface::FdSet>, exceptfds: Option<&mut interface::FdSet>, timeout: Option) -> i32 { if nfds < STARTINGFD || nfds >= FD_SET_MAX_FD { return syscall_error(Errno::EINVAL, "select", "Number of FDs is wrong"); @@ -1229,32 +1221,32 @@ impl Cage { let mut retval = 0; // in the loop below, we always read from original fd_sets, but make updates to the new copies - let mut new_reads_chunk: [u8; (FD_SET_MAX_FD / 8) as usize] = [0; (FD_SET_MAX_FD / 8) as usize]; - let mut new_writes_chunk: [u8; (FD_SET_MAX_FD / 8) as usize] = [0; (FD_SET_MAX_FD / 8) as usize]; - let new_readfds = new_reads_chunk.as_mut_ptr(); - let new_writefds = new_writes_chunk.as_mut_ptr(); + let new_readfds = &mut interface::FdSet::new(); + let new_writefds = &mut interface::FdSet::new(); loop { //we must block manually // 1. iterate thru readfds - if readfds.is_some() { - let res = self.select_readfds(nfds, readfds.unwrap(), new_readfds, &mut retval); + if let Some(readfds_ref) = readfds.as_ref() { + let res = self.select_readfds(nfds, readfds_ref, new_readfds, &mut retval); if res != 0 {return res} } // 2. iterate thru writefds - if writefds.is_some() { - let res = self.select_writefds(nfds, writefds.unwrap(), new_writefds, &mut retval); + if let Some(writefds_ref) = writefds.as_ref() { + let res = self.select_writefds(nfds, writefds_ref, new_writefds, &mut retval); if res != 0 {return res} } // 3. iterate thru exceptfds // currently we don't really do select on execptfds, we just check if those fds are valid - if exceptfds.is_some() { + if let Some(exceptfds_ref) = exceptfds.as_ref() { for fd in 0..nfds { // find the bit and see if it's on - if !interface::fd_set_check_fd(exceptfds.unwrap(), fd) {continue} + if !exceptfds_ref.is_set(fd) { continue; } let checkedfd = self.get_filedescriptor(fd).unwrap(); let unlocked_fd = checkedfd.read(); - if let None = *unlocked_fd { return syscall_error(Errno::EBADF, "select", "invalid file descriptor"); } + if unlocked_fd.is_none() { + return syscall_error(Errno::EBADF, "select", "invalid file descriptor"); + } } } @@ -1263,37 +1255,40 @@ impl Cage { } else { // at this point lets check if we got a signal before sleeping if interface::sigcheck() { return syscall_error(Errno::EINTR, "select", "interrupted function call"); } - interface::lind_yield(); + interface::sleep(BLOCK_TIME); } } - // update the original fd_set bitmaps + // Now we copy our internal FdSet struct results back into the *mut libc::fd_set if readfds.is_some() { - interface::fd_set_copy(new_readfds, readfds.unwrap(), nfds); + readfds.unwrap().copy_from(&new_readfds); } if writefds.is_some() { - interface::fd_set_copy(new_writefds, writefds.unwrap(), nfds); + writefds.unwrap().copy_from(&new_writefds); } return retval; } - fn select_readfds(&self, nfds: i32, readfds: *mut u8, new_readfds: *mut u8, retval: &mut i32) -> i32 { + fn select_readfds(&self, nfds: i32, readfds: &interface::FdSet, new_readfds: &mut interface::FdSet, retval: &mut i32) -> i32 { + // For INET: prepare the data structures for the kernel_select's use + let mut inet_info = SelectInetInfo::new(); + for fd in 0..nfds { // check if current i is in readfd - if !interface::fd_set_check_fd(readfds, fd) {continue} + if !readfds.is_set(fd) {continue} let checkedfd = self.get_filedescriptor(fd).unwrap(); let unlocked_fd = checkedfd.read(); if let Some(filedesc_enum) = &*unlocked_fd { match filedesc_enum { Socket(ref sockfdobj) => { - let sock_tmp = sockfdobj.handle.clone(); - let sockhandle = sock_tmp.read(); let mut newconnection = false; - match sockhandle.domain { + match sockfdobj.domain { AF_UNIX => { + let sock_tmp = sockfdobj.handle.clone(); + let sockhandle = sock_tmp.read(); if sockhandle.state == ConnState::INPROGRESS { let remotepathbuf = normpath(convpath(sockhandle.remoteaddr.unwrap().path()), self); let dsconnobj = NET_METADATA.domsock_accept_table.get(&remotepathbuf); @@ -1305,71 +1300,35 @@ impl Cage { let dsconnobj = NET_METADATA.domsock_accept_table.get(&localpathbuf); if dsconnobj.is_some() { // we have a connecting domain socket, return as readable to be accepted - interface::fd_set_insert(new_readfds, fd); + new_readfds.set(fd); *retval += 1; } } else if sockhandle.state == ConnState::CONNECTED || newconnection { let sockinfo = &sockhandle.unix_info.as_ref().unwrap(); let receivepipe = sockinfo.receivepipe.as_ref().unwrap(); if receivepipe.check_select_read() { - interface::fd_set_insert(new_readfds, fd); + new_readfds.set(fd); *retval += 1; } } } AF_INET | AF_INET6 => { - if sockhandle.state == ConnState::LISTEN { - let ladr = sockhandle.localaddr.unwrap().clone(); //must have been populated by implicit bind - let porttuple = mux_port(ladr.addr().clone(), ladr.port(), sockhandle.domain, TCPPORT); - let mut pendingvec = NET_METADATA.pending_conn_table.get_mut(&porttuple).unwrap(); - - if pendingvec.is_empty() { - //innersock unwrap ok because sockhandle is listening - let listeningsocket = match sockhandle.domain { - PF_INET => sockhandle.innersocket.as_ref().unwrap().nonblock_accept(true), - PF_INET6 => sockhandle.innersocket.as_ref().unwrap().nonblock_accept(false), - _ => panic!("Unknown domain in accepting socket"), - }; - drop(sockhandle); - if let Ok(_) = listeningsocket.0 { - //save the new pending connection for accept to do something with it - pendingvec.push(listeningsocket); - } else { - // if it returned an error, then don't insert it into new_readfds - // of course unset the bit explicitly before we continue - interface::fd_set_remove(new_readfds, fd); - continue; - } - } // if we get here we have an existing connection - - //if we reach here there is a pending connection, either from a new or existing connection - interface::fd_set_insert(new_readfds, fd); - *retval += 1; - //sockhandle innersocket unwrap ok if INPROGRESS - } else if sockhandle.state == ConnState::INPROGRESS && sockhandle.innersocket.as_ref().unwrap().check_rawconnection() { - newconnection = true; - interface::fd_set_insert(new_readfds, fd); - *retval += 1; - } else { - if sockhandle.protocol == IPPROTO_UDP { - interface::fd_set_insert(new_readfds, fd); - *retval += 1; - } else { - drop(sockhandle); - drop(unlocked_fd); - if self._nonblock_peek_read(fd) { - interface::fd_set_insert(new_readfds, fd); - *retval += 1; - } - } + // here we simply record the inet fd into inet_fds and the tuple list for using kernel_select + if sockfdobj.rawfd < 0 { continue; } + + inet_info.kernel_fds.set(sockfdobj.rawfd); + inet_info.rawfd_lindfd_tuples.push((sockfdobj.rawfd, fd)); + if sockfdobj.rawfd > inet_info.highest_raw_fd { + inet_info.highest_raw_fd = sockfdobj.rawfd; } }, _ => {return syscall_error(Errno::EINVAL, "select", "Unsupported domain provided")} } if newconnection { - let mut newconnhandle = sock_tmp.write(); - newconnhandle.state = ConnState::CONNECTED; + let sock_tmp = sockfdobj.handle.clone(); + let mut sockhandle = sock_tmp.write(); + sockhandle.state = ConnState::CONNECTED; } } @@ -1378,14 +1337,14 @@ impl Cage { Pipe(pipefdobj) => { if pipefdobj.pipe.check_select_read() { - interface::fd_set_insert(new_readfds, fd); + new_readfds.set(fd); *retval += 1; } } //these file reads never block _ => { - interface::fd_set_insert(new_readfds, fd); + new_readfds.set(fd); *retval += 1; } } @@ -1393,13 +1352,23 @@ impl Cage { return syscall_error(Errno::EBADF, "select", "invalid file descriptor"); } } + + // do the kernel_select for inet sockets + if !inet_info.kernel_fds.is_empty() { + let kernel_ret = update_readfds_from_kernel_select(new_readfds, &mut inet_info, retval); + // NOTE: we ignore the kernel_select error if some domsocks are ready + if kernel_ret < 0 && *retval <= 0 { + return kernel_ret; + } + } + return 0; } - fn select_writefds(&self, nfds: i32, writefds: *mut u8, new_writefds: *mut u8, retval: &mut i32) -> i32 { + fn select_writefds(&self, nfds: i32, writefds: & interface::FdSet, new_writefds: &mut interface::FdSet, retval: &mut i32) -> i32 { for fd in 0..nfds { // check if current i is in writefds - if !interface::fd_set_check_fd(writefds, fd) {continue} + if !writefds.is_set(fd) {continue} let checkedfd = self.get_filedescriptor(fd).unwrap(); let unlocked_fd = checkedfd.read(); @@ -1430,26 +1399,26 @@ impl Cage { } //we always say sockets are writable? Even though this is not true - interface::fd_set_insert(new_writefds, fd); + new_writefds.set(fd); *retval += 1; } //we always say streams are writable? Stream(_) => { - interface::fd_set_insert(new_writefds, fd); + new_writefds.set(fd); *retval += 1; } Pipe(pipefdobj) => { if pipefdobj.pipe.check_select_write() { - interface::fd_set_insert(new_writefds, fd); + new_writefds.set(fd); *retval += 1; } } //these file writes never block _ => { - interface::fd_set_insert(new_writefds, fd); + new_writefds.set(fd); *retval += 1; } } @@ -1465,20 +1434,29 @@ impl Cage { let mut unlocked_fd = checkedfd.write(); if let Some(filedesc_enum) = &mut *unlocked_fd { if let Socket(ref mut sockfdobj) = filedesc_enum { - //checking that we recieved SOL_SOCKET + let optbit = 1 << optname; + let sock_tmp = sockfdobj.handle.clone(); + let mut sockhandle = sock_tmp.write(); match level { SOL_UDP => { return syscall_error(Errno::EOPNOTSUPP, "getsockopt", "UDP is not supported for getsockopt"); } SOL_TCP => { + // Checking the tcp_options here + // Currently only support TCP_NODELAY option for SOL_TCP + if optname == TCP_NODELAY { + let optbit = 1 << optname; + if optbit & sockhandle.tcp_options == optbit { + *optval = 1; + } else { + *optval = 0; + } + return 0; + } return syscall_error(Errno::EOPNOTSUPP, "getsockopt", "TCP options not remembered by getsockopt"); } SOL_SOCKET => { - let optbit = 1 << optname; - - let sock_tmp = sockfdobj.handle.clone(); - let mut sockhandle = sock_tmp.write(); - + // checking the socket_options here match optname { //indicate whether we are accepting connections or not in the moment SO_ACCEPTCONN => { @@ -1490,7 +1468,7 @@ impl Cage { } //if the option is a stored binary option, just return it... SO_LINGER | SO_KEEPALIVE | SO_SNDLOWAT | SO_RCVLOWAT | SO_REUSEPORT | SO_REUSEADDR => { - if sockhandle.options & optbit == optbit { + if sockhandle.socket_options & optbit == optbit { *optval = 1; } else { *optval = 0; @@ -1542,15 +1520,42 @@ impl Cage { //checking that we recieved SOL_SOCKET match level { SOL_UDP => { - return syscall_error(Errno::EOPNOTSUPP, "getsockopt", "UDP is not supported for getsockopt"); + return syscall_error(Errno::EOPNOTSUPP, "setsockopt", "UDP is not supported for getsockopt"); } SOL_TCP => { + // Here we check and set tcp_options + // Currently only support TCP_NODELAY for SOL_TCP if optname == TCP_NODELAY { - return 0; + let optbit = 1 << optname; + let sock_tmp = sockfdobj.handle.clone(); + let mut sockhandle = sock_tmp.write(); + let mut newoptions = sockhandle.tcp_options; + //now let's set this if we were told to + if optval != 0 { + //optval should always be 1 or 0. + newoptions |= optbit; + } else { + newoptions &= !optbit; + } + + if newoptions != sockhandle.tcp_options { + if let Some(sock) = sockhandle.innersocket.as_ref() { + let sockret = sock.setsockopt(SOL_TCP, optname, optval); + if sockret < 0 { + match Errno::from_discriminant(interface::get_errno()) { + Ok(i) => {return syscall_error(i, "setsockopt", "The libc call to setsockopt failed!");}, + Err(()) => panic!("Unknown errno value from setsockopt returned!"), + }; + } + } + } + sockhandle.tcp_options = newoptions; + return 0; } - return syscall_error(Errno::EOPNOTSUPP, "getsockopt", "TCP options not remembered by getsockopt"); + return syscall_error(Errno::EOPNOTSUPP, "setsockopt", "This TCP option is not remembered by setsockopt"); } SOL_SOCKET => { + // Here we check and set socket_options let optbit = 1 << optname; let sock_tmp = sockfdobj.handle.clone(); let mut sockhandle = sock_tmp.write(); @@ -1562,10 +1567,10 @@ impl Cage { } SO_LINGER | SO_KEEPALIVE => { if optval == 0 { - sockhandle.options &= !optbit; + sockhandle.socket_options &= !optbit; } else { //optval should always be 1 or 0. - sockhandle.options |= optbit; + sockhandle.socket_options |= optbit; } @@ -1573,7 +1578,7 @@ impl Cage { } SO_REUSEPORT | SO_REUSEADDR => { - let mut newoptions = sockhandle.options; + let mut newoptions = sockhandle.socket_options; //now let's set this if we were told to if optval != 0 { //optval should always be 1 or 0. @@ -1582,7 +1587,7 @@ impl Cage { newoptions &= !optbit; } - if newoptions != sockhandle.options { + if newoptions != sockhandle.socket_options { if let Some(sock) = sockhandle.innersocket.as_ref() { let sockret = sock.setsockopt(SOL_SOCKET, optname, optval); if sockret < 0 { @@ -1594,7 +1599,7 @@ impl Cage { } } - sockhandle.options = newoptions; + sockhandle.socket_options = newoptions; return 0; } @@ -1729,21 +1734,17 @@ impl Cage { let fd = structpoll.fd; let events = structpoll.events; - // allocate spaces for fd_set bitmaps - let mut reads_chunk: [u8; (FD_SET_MAX_FD / 8) as usize] = [0; (FD_SET_MAX_FD / 8) as usize]; - let mut writes_chunk: [u8; (FD_SET_MAX_FD / 8) as usize] = [0; (FD_SET_MAX_FD / 8) as usize]; - let mut errors_chunk: [u8; (FD_SET_MAX_FD / 8) as usize] = [0; (FD_SET_MAX_FD / 8) as usize]; - - let reads: *mut u8 = reads_chunk.as_mut_ptr(); - let writes: *mut u8 = writes_chunk.as_mut_ptr(); - let errors: *mut u8 = errors_chunk.as_mut_ptr(); + // init FdSet structures + let reads = &mut interface::FdSet::new(); + let writes = &mut interface::FdSet::new(); + let errors = &mut interface::FdSet::new(); //read - if events & POLLIN > 0 {interface::fd_set_insert(reads, fd)} + if events & POLLIN > 0 {reads.set(fd)} //write - if events & POLLOUT > 0 {interface::fd_set_insert(writes, fd)} + if events & POLLOUT > 0 {writes.set(fd)} //err - if events & POLLERR > 0 {interface::fd_set_insert(errors, fd)} + if events & POLLERR > 0 {errors.set(fd)} let mut mask: i16 = 0; @@ -1751,9 +1752,9 @@ impl Cage { // NOTE that the nfds argument is highest fd + 1 let selectret = Self::select_syscall(&self, fd + 1, Some(reads), Some(writes), Some(errors), Some(interface::RustDuration::ZERO)); if selectret > 0 { - mask += if !interface::fd_set_is_empty(reads, fd) {POLLIN} else {0}; - mask += if !interface::fd_set_is_empty(writes, fd) {POLLOUT} else {0}; - mask += if !interface::fd_set_is_empty(errors, fd) {POLLERR} else {0}; + mask += if !reads.is_empty() {POLLIN} else {0}; + mask += if !writes.is_empty() {POLLOUT} else {0}; + mask += if !errors.is_empty() {POLLERR} else {0}; return_code += 1; } else if selectret < 0 { return selectret; } structpoll.revents = mask; diff --git a/src/tests/networking_tests.rs b/src/tests/networking_tests.rs index 480a8000a..2f81358a3 100644 --- a/src/tests/networking_tests.rs +++ b/src/tests/networking_tests.rs @@ -694,19 +694,16 @@ pub mod net_tests { assert_eq!(cage.listen_syscall(serversockfd, 4), 0); // allocate spaces for fd_set bitmaps - let mut input_chunk: [u8; 128] = [0; 128]; - let mut output_chunk: [u8; 128] = [0; 128]; - - let inputs: *mut u8 = input_chunk.as_mut_ptr(); - let outputs: *mut u8 = output_chunk.as_mut_ptr(); + let inputs = &mut interface::FdSet::new(); + let outputs = &mut interface::FdSet::new(); - interface::fd_set_insert(inputs, serversockfd); - interface::fd_set_insert(inputs, filefd); - interface::fd_set_insert(outputs, filefd); + inputs.set(serversockfd); + inputs.set(filefd); + outputs.set(filefd); - assert_eq!(interface::fd_set_check_fd(inputs, serversockfd), true); - assert_eq!(interface::fd_set_check_fd(inputs, filefd), true); - assert_eq!(interface::fd_set_check_fd(outputs, filefd), true); + assert_eq!(inputs.is_set(serversockfd), true); + assert_eq!(inputs.is_set(filefd), true); + assert_eq!(outputs.is_set(filefd), true); assert_eq!(cage.fork_syscall(2), 0); assert_eq!(cage.fork_syscall(3), 0); @@ -773,20 +770,20 @@ pub mod net_tests { //Check for any activity in any of the Input sockets... //for sock in binputs { for sock in 0..FD_SET_MAX_FD { - if !interface::fd_set_check_fd(inputs, sock) {continue;} + if !inputs.is_set(sock) {continue;} //If the socket returned was listerner socket, then there's a new conn., so we accept it, and put the client socket in the list of Inputs. if sock == serversockfd { let mut sockgarbage = interface::GenSockaddr::V4(interface::SockaddrV4::default()); let sockfd = cage.accept_syscall(sock as i32, &mut sockgarbage); //really can only make sure that the fd is valid assert!(sockfd > 0); - interface::fd_set_insert(inputs, sockfd); - interface::fd_set_insert(outputs, sockfd); + inputs.set(sockfd); + outputs.set(sockfd) } else if sock == filefd { //Write to a file... assert_eq!(cage.write_syscall(sock as i32, str2cbuf("test"), 4), 4); assert_eq!(cage.lseek_syscall(sock as i32, 0, SEEK_SET), 0); - interface::fd_set_remove(inputs, sock); + inputs.clear(sock) } else { //If the socket is in established conn., then we recv the data. If there's no data, then close the client socket. let mut buf = sizecbuf(4); let mut recvresult :i32; @@ -798,28 +795,28 @@ pub mod net_tests { } if recvresult == 4 { if cbuf2str(&buf) == "test" { - interface::fd_set_insert(outputs, sock); + outputs.set(sock); continue; } } else { assert_eq!(recvresult, 0); } assert_eq!(cage.close_syscall(sock as i32), 0); - interface::fd_set_remove(inputs, sock); + inputs.clear(sock); } } //for sock in boutputs { for sock in 0..FD_SET_MAX_FD { - if !interface::fd_set_check_fd(outputs, sock) {continue;} + if !outputs.is_set(sock) {continue;} if sock == filefd { let mut buf = sizecbuf(4); assert_eq!(cage.read_syscall(sock as i32, buf.as_mut_ptr(), 4), 4); assert_eq!(cbuf2str(&buf), "test"); - interface::fd_set_remove(outputs, sock) + outputs.clear(sock); } else { //Data is sent out this socket, it's no longer ready for writing remove this socket from writefd's. assert_eq!(cage.send_syscall(sock as i32, str2cbuf("test"), 4, 0), 4); - interface::fd_set_remove(outputs, sock) + outputs.clear(sock); } } }