diff --git a/common/include/linux_socket.h b/common/include/linux_socket.h index a6aae1a103..60dcab878c 100644 --- a/common/include/linux_socket.h +++ b/common/include/linux_socket.h @@ -55,6 +55,8 @@ struct cmsghdr { #define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len)) #define SCM_RIGHTS 1 +#define SCM_CREDENTIALS 2 +#define SCM_SECURITY 3 #define AF_UNSPEC 0 #define AF_UNIX 1 @@ -96,9 +98,14 @@ struct cmsghdr { #define SO_RCVTIMEO 20 #define SO_SNDTIMEO 21 #define SO_ACCEPTCONN 30 +#define SO_MARK 36 +#define SO_TIMESTAMPING_OLD 37 #define SO_PROTOCOL 38 #define SO_DOMAIN 39 +#define SO_TXTIME 61 +#define SCM_TXTIME SO_TXTIME + /* TCP options. */ #define TCP_NODELAY 1 /* Turn off Nagle's algorithm */ #define TCP_CORK 3 /* Never send partially complete segments */ diff --git a/libos/include/libos_socket.h b/libos/include/libos_socket.h index d95ec06ce6..8b7d5789bf 100644 --- a/libos/include/libos_socket.h +++ b/libos/include/libos_socket.h @@ -80,6 +80,8 @@ struct libos_sock_ops { * \param handle A handle. * \param iov An array of buffers to write from. * \param iov_len The length of \p iov. + * \param msg_control An ancillary data buffer. + * \param msg_controllen The length of \p msg_control. * \param[out] out_size On success contains the number of bytes sent. * \param addr An address to send to. May be NULL. It's up to * the implementation to decide what to do with it (which might @@ -88,8 +90,9 @@ struct libos_sock_ops { * \param force_nonblocking If `true` this request should not block. Otherwise just use * whatever mode the handle is in. */ - int (*send)(struct libos_handle* handle, struct iovec* iov, size_t iov_len, size_t* out_size, - void* addr, size_t addrlen, bool force_nonblocking); + int (*send)(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* msg_control, + size_t msg_controllen, size_t* out_size, void* addr, size_t addrlen, + bool force_nonblocking); /*! * \brief Receive continuous data into an array of buffers. @@ -97,19 +100,23 @@ struct libos_sock_ops { * \param handle A handle. * \param iov An array of buffers to read to. * \param iov_len The length of \p iov. + * \param msg_control An ancillary data buffer to populate. + * \param[in,out] msg_controllen_ptr The length of \p msg_control. On success updated to the + * actual length of the received ancillary data. * \param[out] out_total_size On success contains the number of bytes received (STREAM) * or the datagram size (DGRAM), which might be bigger than * the total size of buffers in \p iov array. * \param[out] addr On success contains the address data was received from. May * be NULL. - * \param[in,out] addrlen The length of \p addr. On success updated to the actual + * \param[in,out] addrlen_ptr The length of \p addr. On success updated to the actual * length of the address. Bigger than original value indicates * that truncation has happened. * \param force_nonblocking If `true` this request should not block. Otherwise just use * whatever mode the handle is in. */ - int (*recv)(struct libos_handle* handle, struct iovec* iov, size_t iov_len, - size_t* out_total_size, void* addr, size_t* addrlen, bool force_nonblocking); + int (*recv)(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* msg_control, + size_t* msg_controllen_ptr, size_t* out_total_size, void* addr, size_t* addrlen_ptr, + bool force_nonblocking); }; struct libos_handle* get_new_socket_handle(int family, int type, int protocol, @@ -118,7 +125,9 @@ struct libos_handle* get_new_socket_handle(int family, int type, int protocol, extern struct libos_sock_ops sock_unix_ops; extern struct libos_sock_ops sock_ip_ops; -ssize_t do_recvmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* addr, - size_t* addrlen, unsigned int* flags); -ssize_t do_sendmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* addr, - size_t addrlen, unsigned int flags); +ssize_t do_recvmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, + void* msg_control, size_t* msg_controllen_ptr, void* addr, size_t* addrlen_ptr, + unsigned int* flags); +ssize_t do_sendmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, + void* msg_control, size_t msg_controllen, void* addr, size_t addrlen, + unsigned int flags); diff --git a/libos/src/fs/socket/fs.c b/libos/src/fs/socket/fs.c index 5f2d06bfad..ed32dbdd0d 100644 --- a/libos/src/fs/socket/fs.c +++ b/libos/src/fs/socket/fs.c @@ -36,7 +36,8 @@ static ssize_t read(struct libos_handle* handle, void* buf, size_t size, file_of .iov_len = size, }; unsigned int flags = 0; - return do_recvmsg(handle, &iov, /*iov_len=*/1, /*addr=*/NULL, /*addrlen=*/NULL, &flags); + return do_recvmsg(handle, &iov, /*iov_len=*/1, /*msg_control=*/NULL, + /*msg_controllen_ptr=*/NULL, /*addr=*/NULL, /*addrlen_ptr=*/NULL, &flags); } static ssize_t write(struct libos_handle* handle, const void* buf, size_t size, file_off_t* pos) { @@ -45,20 +46,23 @@ static ssize_t write(struct libos_handle* handle, const void* buf, size_t size, .iov_base = (void*)buf, .iov_len = size, }; - return do_sendmsg(handle, &iov, /*iov_len=*/1, /*addr=*/NULL, /*addrlen=*/0, /*flags=*/0); + return do_sendmsg(handle, &iov, /*iov_len=*/1, /*msg_control=*/NULL, /*msg_controllen=*/0, + /*addr=*/NULL, /*addrlen=*/0, /*flags=*/0); } static ssize_t readv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, file_off_t* pos) { __UNUSED(pos); unsigned int flags = 0; - return do_recvmsg(handle, iov, iov_len, /*addr=*/NULL, /*addrlen=*/NULL, &flags); + return do_recvmsg(handle, iov, iov_len, /*msg_control=*/NULL, /*msg_controllen_ptr=*/NULL, + /*addr=*/NULL, /*addrlen_ptr=*/NULL, &flags); } static ssize_t writev(struct libos_handle* handle, struct iovec* iov, size_t iov_len, file_off_t* pos) { __UNUSED(pos); - return do_sendmsg(handle, iov, iov_len, /*addr=*/NULL, /*addrlen=*/0, /*flags=*/0); + return do_sendmsg(handle, iov, iov_len, /*msg_control=*/NULL, /*msg_controllen=*/0, + /*addr=*/NULL, /*addrlen=*/0, /*flags=*/0); } static int hstat(struct libos_handle* handle, struct stat* stat) { diff --git a/libos/src/net/ip.c b/libos/src/net/ip.c index 5463f6e118..907219bc7e 100644 --- a/libos/src/net/ip.c +++ b/libos/src/net/ip.c @@ -11,14 +11,15 @@ #include "libos_fs.h" #include "libos_socket.h" +#include "linux_socket.h" #include "pal.h" #include "socket_utils.h" -static int verify_sockaddr(int expected_family, void* addr, size_t* addrlen) { +static int verify_sockaddr(int expected_family, void* addr, size_t* addrlen_ptr) { unsigned short family; switch (expected_family) { case AF_INET: - if (*addrlen < sizeof(struct sockaddr_in)) { + if (*addrlen_ptr < sizeof(struct sockaddr_in)) { return -EINVAL; } memcpy(&family, (char*)addr + offsetof(struct sockaddr_in, sin_family), sizeof(family)); @@ -27,10 +28,10 @@ static int verify_sockaddr(int expected_family, void* addr, size_t* addrlen) { } /* Cap the address at the maximal possible size - rest of the input buffer (if any) is * ignored. */ - *addrlen = sizeof(struct sockaddr_in); + *addrlen_ptr = sizeof(struct sockaddr_in); break; case AF_INET6: - if (*addrlen < sizeof(struct sockaddr_in6)) { + if (*addrlen_ptr < sizeof(struct sockaddr_in6)) { return -EINVAL; } memcpy(&family, (char*)addr + offsetof(struct sockaddr_in6, sin6_family), @@ -40,7 +41,7 @@ static int verify_sockaddr(int expected_family, void* addr, size_t* addrlen) { } /* Cap the address at the maximal possible size - rest of the input buffer (if any) is * ignored. */ - *addrlen = sizeof(struct sockaddr_in6); + *addrlen_ptr = sizeof(struct sockaddr_in6); break; default: BUG(); @@ -662,13 +663,54 @@ static int getsockopt(struct libos_handle* handle, int level, int optname, void* } } -static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, size_t* out_size, - void* addr, size_t addrlen, bool force_nonblocking) { +static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* msg_control, + size_t msg_controllen, size_t* out_size, void* addr, size_t addrlen, + bool force_nonblocking) { assert(handle->type == TYPE_SOCK); struct libos_sock_handle* sock = &handle->info.sock; struct sockaddr_storage sock_addr; + struct cmsghdr* cmsg = (struct cmsghdr*)msg_control; + size_t rest_msg_controllen = msg_controllen; + while (cmsg && rest_msg_controllen >= sizeof(struct cmsghdr)) { + if (cmsg->cmsg_len < sizeof(struct cmsghdr) || + CMSG_ALIGN(cmsg->cmsg_len) > rest_msg_controllen) { + return -EINVAL; + } + + if (cmsg->cmsg_level != SOL_SOCKET) { + /* + * We currently don't support: + * - SOL_UDP: UDP_SEGMENT + * - SOL_IPV6: IPV6_PKTINFO + * - SOL_IP: IP_RETOPTS, IP_PKTINFO, IP_TTL, IP_TOS + * + * Note that there are no cmsgs for TCP (SOL_TCP) in Linux (as of v6.0). + */ + return -EINVAL; + } + + switch (cmsg->cmsg_type) { + /* We currently don't support below SOL_SOCKET types. */ + case SO_MARK: + case SO_TIMESTAMPING_OLD: + case SCM_TXTIME: + return -EINVAL; + + /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX, simply ignored */ + case SCM_RIGHTS: + case SCM_CREDENTIALS: + break; + + default: + return -EINVAL; + } + + rest_msg_controllen -= CMSG_ALIGN(cmsg->cmsg_len); + cmsg = (struct cmsghdr*)((char*)cmsg + CMSG_ALIGN(cmsg->cmsg_len)); + } + switch (sock->type) { case SOCK_STREAM: /* TCP sockets ignore destination address - they must have been connected. */ @@ -709,15 +751,16 @@ static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, return ret; } -static int recv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, - size_t* out_total_size, void* addr, size_t* addrlen, bool force_nonblocking) { +static int recv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* msg_control, + size_t* msg_controllen_ptr, size_t* out_total_size, void* addr, size_t* addrlen_ptr, + bool force_nonblocking) { assert(handle->type == TYPE_SOCK); switch (handle->info.sock.type) { case SOCK_STREAM: /* TCP - not interested in remote address (we know it already). */ addr = NULL; - addrlen = NULL; + addrlen_ptr = NULL; break; case SOCK_DGRAM: break; @@ -731,14 +774,29 @@ static int recv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, if (ret < 0) { return pal_to_unix_errno(ret); } + + if (msg_control && msg_controllen_ptr) { + /* + * We currently don't support: + * - SOL_TCP: TCP_CM_INQ + * - SOL_SOCKET: SO_TIMESTAMPNS_NEW, SO_TIMESTAMPNS_OLD, SO_TIMESTAMP_NEW, SO_TIMESTAMP_OLD + * - SOL_IPV6: IPV6_PKTINFO + * - SOL_IP: IP_RETOPTS, IP_RECVOPTS, IP_PKTINFO, IP_TTL, IP_TOS, IP_RECVFRAGSIZE, + * IP_CHECKSUM, SCM_SECURITY, IP_ORIGDSTADDR, IP_RECVERR + * + * Note that SCM_RIGHTS and SCM_CREDENTIALS are not possible on TCP/UDP sockets. + */ + *msg_controllen_ptr = 0; + } + if (addr) { struct sockaddr_storage linux_addr; size_t linux_addr_len = sizeof(linux_addr); pal_to_linux_sockaddr(&pal_ip_addr, &linux_addr, &linux_addr_len); /* If the user provided buffer is too small, the address is truncated, but we report - * the actual address size in `addrlen`. */ - memcpy(addr, &linux_addr, MIN(*addrlen, linux_addr_len)); - *addrlen = linux_addr_len; + * the actual address size in `addrlen_ptr`. */ + memcpy(addr, &linux_addr, MIN(*addrlen_ptr, linux_addr_len)); + *addrlen_ptr = linux_addr_len; } return 0; } diff --git a/libos/src/net/unix.c b/libos/src/net/unix.c index 21b1720dcd..25ec8c368d 100644 --- a/libos/src/net/unix.c +++ b/libos/src/net/unix.c @@ -19,25 +19,27 @@ #include "libos_fs.h" #include "libos_internal.h" #include "libos_socket.h" +#include "linux_socket.h" #include "pal.h" /*! * \brief Verify UNIX socket address and convert it to a unique socket name. * * \param addr The socket address to convert. - * \param[in,out] addrlen Pointer to the size of \p addr. Always updated to the actual size + * \param[in,out] addrlen_ptr Pointer to the size of \p addr. Always updated to the actual size * of the address (but it's never extended). * \param[out] sock_name Buffer for the output socket name. On success contains a null * terminated string. * \param sock_name_size Size of \p sock_name. */ -static int unaddr_to_sockname(void* addr, size_t* addrlen, char* sock_name, size_t sock_name_size) { - if (*addrlen > sizeof(struct sockaddr_un)) { +static int unaddr_to_sockname(void* addr, size_t* addrlen_ptr, char* sock_name, + size_t sock_name_size) { + if (*addrlen_ptr > sizeof(struct sockaddr_un)) { /* Cap the address at the maximal possible size - rest of the input buffer (if any) is * ignored. */ - *addrlen = sizeof(struct sockaddr_un); + *addrlen_ptr = sizeof(struct sockaddr_un); } - if (*addrlen < offsetof(struct sockaddr_un, sun_path) + 1) { + if (*addrlen_ptr < offsetof(struct sockaddr_un, sun_path) + 1) { return -EINVAL; } static_assert(offsetof(struct sockaddr_un, sun_family) < offsetof(struct sockaddr_un, sun_path), @@ -49,7 +51,7 @@ static int unaddr_to_sockname(void* addr, size_t* addrlen, char* sock_name, size } const char* path = (char*)addr + offsetof(struct sockaddr_un, sun_path); - size_t pathlen = *addrlen - offsetof(struct sockaddr_un, sun_path); + size_t pathlen = *addrlen_ptr - offsetof(struct sockaddr_un, sun_path); assert(pathlen >= 1); if (path[0]) { /* Named UNIX socket. */ @@ -75,16 +77,16 @@ static int unaddr_to_sockname(void* addr, size_t* addrlen, char* sock_name, size return 0; } -static void fixup_sockaddr_un_path(struct sockaddr_storage* ss_addr, size_t* addrlen) { +static void fixup_sockaddr_un_path(struct sockaddr_storage* ss_addr, size_t* addrlen_ptr) { /* We know the addr is valid, but it might not contain the ending nullbyte or contain some * unnecessary garbage after it. */ - assert(*addrlen <= sizeof(struct sockaddr_un)); - assert(offsetof(struct sockaddr_un, sun_path) < *addrlen); + assert(*addrlen_ptr <= sizeof(struct sockaddr_un)); + assert(offsetof(struct sockaddr_un, sun_path) < *addrlen_ptr); assert(sizeof(struct sockaddr_un) < sizeof(*ss_addr)); - assert(*addrlen < sizeof(*ss_addr)); + assert(*addrlen_ptr < sizeof(*ss_addr)); char* path = (char*)ss_addr + offsetof(struct sockaddr_un, sun_path); - size_t pathlen = *addrlen - offsetof(struct sockaddr_un, sun_path); + size_t pathlen = *addrlen_ptr - offsetof(struct sockaddr_un, sun_path); assert(pathlen >= 1); if (!path[0]) { /* Abstract UNIX socket - nothing to do. */ @@ -97,8 +99,8 @@ static void fixup_sockaddr_un_path(struct sockaddr_storage* ss_addr, size_t* add assert(sizeof(*ss_addr) - offsetof(struct sockaddr_un, sun_path) - pathlen > 0); memset(path + pathlen, 0, sizeof(*ss_addr) - offsetof(struct sockaddr_un, sun_path) - pathlen); - *addrlen = offsetof(struct sockaddr_un, sun_path) + pathlen + 1; - assert(*addrlen <= sizeof(*ss_addr)); + *addrlen_ptr = offsetof(struct sockaddr_un, sun_path) + pathlen + 1; + assert(*addrlen_ptr <= sizeof(*ss_addr)); } static int create(struct libos_handle* handle) { @@ -399,8 +401,9 @@ static int maybe_force_nonblocking_wrapper(bool force_nonblocking, struct libos_ return ret; } -static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, size_t* out_size, - void* addr, size_t addrlen, bool force_nonblocking) { +static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* msg_control, + size_t msg_controllen, size_t* out_size, void* addr, size_t addrlen, + bool force_nonblocking) { __UNUSED(addr); __UNUSED(addrlen); @@ -409,6 +412,32 @@ static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, BUG(); } + struct cmsghdr* cmsg = (struct cmsghdr*)msg_control; + size_t rest_msg_controllen = msg_controllen; + while (cmsg && rest_msg_controllen >= sizeof(struct cmsghdr)) { + if (cmsg->cmsg_len < sizeof(struct cmsghdr) || + CMSG_ALIGN(cmsg->cmsg_len) > rest_msg_controllen) { + return -EINVAL; + } + + if (cmsg->cmsg_level != SOL_SOCKET) { + /* Linux ignores non-SOL-SOCKET cmsgs instead of erroring out, let's do the same */ + continue; + } + + switch (cmsg->cmsg_type) { + /* TODO: implement SCM_RIGHTS and SCM_CREDENTIALS */ + case SCM_RIGHTS: + case SCM_CREDENTIALS: + return -ENOSYS; + default: + return -EINVAL; + } + + rest_msg_controllen -= CMSG_ALIGN(cmsg->cmsg_len); + cmsg = (struct cmsghdr*)((char*)cmsg + CMSG_ALIGN(cmsg->cmsg_len)); + } + PAL_HANDLE pal_handle = __atomic_load_n(&handle->info.sock.pal_handle, __ATOMIC_ACQUIRE); if (!pal_handle) { return -ENOTCONN; @@ -449,10 +478,11 @@ static int send(struct libos_handle* handle, struct iovec* iov, size_t iov_len, return 0; } -static int recv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, size_t* out_size, - void* addr, size_t* addrlen, bool force_nonblocking) { +static int recv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* msg_control, + size_t* msg_controllen_ptr, size_t* out_size, void* addr, size_t* addrlen_ptr, + bool force_nonblocking) { __UNUSED(addr); - __UNUSED(addrlen); + __UNUSED(addrlen_ptr); if (handle->info.sock.type == SOCK_DGRAM) { /* We do not support datagram UNIX sockets. */ @@ -500,6 +530,12 @@ static int recv(struct libos_handle* handle, struct iovec* iov, size_t iov_len, *out_size = size; } free(backing_buf); + + if (msg_control && msg_controllen_ptr) { + /* TODO: implement SCM_RIGHTS and SCM_CREDENTIALS (if sent by app) */ + *msg_controllen_ptr = 0; + } + return ret; } diff --git a/libos/src/sys/libos_socket.c b/libos/src/sys/libos_socket.c index eb761c435d..72d5ed14d4 100644 --- a/libos/src/sys/libos_socket.c +++ b/libos/src/sys/libos_socket.c @@ -582,8 +582,9 @@ static int check_msghdr(struct msghdr* user_msg, bool is_recv) { } } if (user_msg->msg_control && user_msg->msg_controllen) { - log_warning("\"struct msghdr\" ancillary data is not supported"); - return -ENOSYS; + if (!check_access_func(user_msg->msg_control, user_msg->msg_controllen)) { + return -EFAULT; + } } if (user_msg->msg_name) { if (user_msg->msg_namelen < 0) { @@ -608,8 +609,9 @@ static int check_msghdr(struct msghdr* user_msg, bool is_recv) { /* We return the size directly (contrary to the usual out argument) for simplicity - this function * is called directly from syscall handlers, which return values in such a way. */ -ssize_t do_sendmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* addr, - size_t addrlen, unsigned int flags) { +ssize_t do_sendmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, + void* msg_control, size_t msg_controllen, void* addr, size_t addrlen, + unsigned int flags) { ssize_t ret = 0; if (handle->type != TYPE_SOCK) { return -ENOTSOCK; @@ -654,7 +656,8 @@ ssize_t do_sendmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_le } size_t size = 0; - ret = sock->ops->send(handle, iov, iov_len, &size, addr, addrlen, force_nonblocking); + ret = sock->ops->send(handle, iov, iov_len, msg_control, msg_controllen, &size, addr, addrlen, + force_nonblocking); maybe_epoll_et_trigger(handle, ret, /*in=*/false, !ret ? size < total_size : false); if (!ret) { ret = size; @@ -707,7 +710,8 @@ long libos_syscall_sendto(int fd, void* buf, size_t len, unsigned int flags, voi .iov_base = buf, .iov_len = len, }; - ssize_t ret = do_sendmsg(handle, &iov, 1, addr, addr ? addrlen : 0, flags); + ssize_t ret = do_sendmsg(handle, &iov, 1, /*msg_control=*/NULL, /*msg_controllen=*/0, addr, + addr ? addrlen : 0, flags); put_handle(handle); return ret; } @@ -724,7 +728,8 @@ long libos_syscall_sendmsg(int fd, struct msghdr* msg, unsigned int flags) { } size_t addrlen = msg->msg_name ? msg->msg_namelen : 0; - ret = do_sendmsg(handle, msg->msg_iov, msg->msg_iovlen, msg->msg_name, addrlen, flags); + ret = do_sendmsg(handle, msg->msg_iov, msg->msg_iovlen, msg->msg_control, msg->msg_controllen, + msg->msg_name, addrlen, flags); put_handle(handle); return ret; } @@ -749,7 +754,8 @@ long libos_syscall_sendmmsg(int fd, struct mmsghdr* msg, unsigned int vlen, unsi for (size_t i = 0; i < vlen; i++) { struct msghdr* hdr = &msg[i].msg_hdr; size_t addrlen = hdr->msg_name ? hdr->msg_namelen : 0; - ret = do_sendmsg(handle, hdr->msg_iov, hdr->msg_iovlen, hdr->msg_name, addrlen, flags); + ret = do_sendmsg(handle, hdr->msg_iov, hdr->msg_iovlen, hdr->msg_control, + hdr->msg_controllen, hdr->msg_name, addrlen, flags); if (ret < 0) { if (i == 0) { /* Return error directly. */ @@ -776,8 +782,9 @@ long libos_syscall_sendmmsg(int fd, struct mmsghdr* msg, unsigned int vlen, unsi /* We return the size directly (contrary to the usual out argument) for simplicity - this function * is called directly from syscall handlers, which return values in such a way. */ -ssize_t do_recvmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, void* addr, - size_t* addrlen, unsigned int* flags) { +ssize_t do_recvmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_len, + void* msg_control, size_t* msg_controllen_ptr, void* addr, size_t* addrlen_ptr, + unsigned int* flags) { ssize_t ret = 0; if (handle->type != TYPE_SOCK) { return -ENOTSOCK; @@ -855,8 +862,9 @@ ssize_t do_recvmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_le force_nonblocking = true; } - ret = sock->ops->recv(handle, &tmp_iov, 1, &tmp_iov.iov_len, /*addr=*/NULL, - /*addrlen=*/NULL, force_nonblocking); + ret = sock->ops->recv(handle, &tmp_iov, 1, /*msg_control=*/NULL, + /*msg_controllen_ptr=*/NULL, &tmp_iov.iov_len, /*addr=*/NULL, + /*addrlen_ptr=*/NULL, force_nonblocking); if (ret == -EAGAIN && sock->peek.data_size) { /* We will just return what we have already. */ ret = 0; @@ -899,7 +907,8 @@ ssize_t do_recvmsg(struct libos_handle* handle, struct iovec* iov, size_t iov_le assert(!(*flags & MSG_PEEK)); size_t size = 0; - ret = sock->ops->recv(handle, iov, iov_len, &size, addr, addrlen, force_nonblocking); + ret = sock->ops->recv(handle, iov, iov_len, msg_control, msg_controllen_ptr, &size, addr, + addrlen_ptr, force_nonblocking); maybe_epoll_et_trigger(handle, ret, /*in=*/true, !ret ? size < total_size : false); if (!ret) { ret = *flags & MSG_TRUNC ? size : MIN(size, total_size); @@ -949,7 +958,8 @@ long libos_syscall_recvfrom(int fd, void* buf, size_t len, unsigned int flags, v .iov_base = buf, .iov_len = len, }; - ssize_t ret = do_recvmsg(handle, &iov, 1, addr, &addrlen, &flags); + ssize_t ret = do_recvmsg(handle, &iov, 1, /*msg_control=*/NULL, /*msg_controllen_ptr=*/NULL, + addr, &addrlen, &flags); if (ret >= 0 && addr) { *_addrlen = addrlen; } @@ -969,7 +979,8 @@ long libos_syscall_recvmsg(int fd, struct msghdr* msg, unsigned int flags) { } size_t addrlen = msg->msg_name ? msg->msg_namelen : 0; - ret = do_recvmsg(handle, msg->msg_iov, msg->msg_iovlen, msg->msg_name, &addrlen, &flags); + ret = do_recvmsg(handle, msg->msg_iov, msg->msg_iovlen, msg->msg_control, &msg->msg_controllen, + msg->msg_name, &addrlen, &flags); if (ret >= 0) { if (msg->msg_name) { msg->msg_namelen = addrlen; @@ -1014,8 +1025,8 @@ long libos_syscall_recvmmsg(int fd, struct mmsghdr* msg, unsigned int vlen, unsi struct msghdr* hdr = &msg[i].msg_hdr; size_t addrlen = hdr->msg_name ? hdr->msg_namelen : 0; unsigned int this_flags = flags; - ret = do_recvmsg(handle, hdr->msg_iov, hdr->msg_iovlen, hdr->msg_name, &addrlen, - &this_flags); + ret = do_recvmsg(handle, hdr->msg_iov, hdr->msg_iovlen, hdr->msg_control, + &hdr->msg_controllen, hdr->msg_name, &addrlen, &this_flags); if (ret < 0) { if (i == 0) { /* Return error directly. */ diff --git a/libos/test/ltp/ltp.cfg b/libos/test/ltp/ltp.cfg index 7efd278e73..04c0b284e5 100644 --- a/libos/test/ltp/ltp.cfg +++ b/libos/test/ltp/ltp.cfg @@ -1661,22 +1661,11 @@ must-pass = 4 5 -# subtest 3: Linux ignores invalid address in recvmsg for stream sockets. -# subtest 4: Requires MSG_ERRQUEUE support (LTP outputs: "skip MSG_ERRQUEUE test, it's supported -# from 3.17"). -# subtest 8: Ancillary data not supported. -# subtest 9: Requires MSG_OOB support. -# subtest 10: Requires MSG_ERRQUEUE support (LTP outputs: "skip MSG_ERRQUEUE test, it's supported -# from 3.17"). -# subtest 11: Ancillary data not supported. -# subtest 12: Ancillary data not supported. +# subtest 8 requires SCM_RIGHTS support (to send an FD from sender to receiver) and doesn't check +# for errors (Gramine fails with -ENOSYS), leading to a hang on the receiver side because it tries +# to receive something that was never sent. [recvmsg01] -must-pass = - 1 - 2 - 5 - 6 - 7 +skip = yes # MSG_PEEK with UDP sockets not supported currently. [recvmsg02] diff --git a/libos/test/regression/meson.build b/libos/test/regression/meson.build index 9a56c80574..18107ccbc8 100644 --- a/libos/test/regression/meson.build +++ b/libos/test/regression/meson.build @@ -124,6 +124,7 @@ tests = { 'syscall': {}, 'syscall_restart': {}, 'sysfs_common': {}, + 'tcp_ancillary': {}, 'tcp_ipv6_v6only': {}, 'tcp_msg_peek': {}, 'udp': {}, diff --git a/libos/test/regression/tcp_ancillary.c b/libos/test/regression/tcp_ancillary.c new file mode 100644 index 0000000000..3452e1d460 --- /dev/null +++ b/libos/test/regression/tcp_ancillary.c @@ -0,0 +1,186 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef SCM_TXTIME +#define SCM_TXTIME 61 +#endif + +#include "common.h" + +#define SRV_IP "127.0.0.1" +#define PORT 11110 + +#define MSG_SPACE (CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))) + +static const char g_buffer[] = "Hello from server!"; + +static void server(int pipefd) { + int s = CHECK(socket(AF_INET, SOCK_STREAM, 0)); + + int enable = 1; + CHECK(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))); + + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_port = htons(PORT), + .sin_addr.s_addr = htonl(INADDR_ANY), + }; + + CHECK(bind(s, (void*)&sa, sizeof(sa))); + CHECK(listen(s, 5)); + + char c = 0; + ssize_t x = CHECK(write(pipefd, &c, sizeof(c))); + if (x != sizeof(c)) { + CHECK(-1); + } + CHECK(close(pipefd)); + + int client = CHECK(accept(s, NULL, NULL)); + + CHECK(close(s)); + + struct iovec iovec = { + .iov_base = (char*)g_buffer, + .iov_len = sizeof(g_buffer), + }; + + char control[MSG_SPACE] = {0}; + struct msghdr msg = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_control = control, + .msg_controllen = sizeof(control), + }; + + /* below two ancillary data are dummies -- they should be ignored on TCP/IP */ + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + struct ucred* cred = (struct ucred*)CMSG_DATA(cmsg); + cred->pid = getpid(); + cred->uid = getuid(); + cred->gid = getgid(); + + cmsg = CMSG_NXTHDR(&msg, cmsg); + if (!cmsg) { + /* make GCC happy (otherwise "potential null pointer dereference") */ + errx(1, "no space for second cmsg"); + } + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + int* fd = (int*)CMSG_DATA(cmsg); + *fd = STDOUT_FILENO; + + x = CHECK(sendmsg(client, &msg, /*flags=*/0)); + if (!x) { + /* technically impossible, but let's fail loudly if we ever hit this */ + errx(1, "sendmsg returned zero"); + } + + /* set some dummy incorrect SCM_TXTIME in second ancillary data -- must result in EINVAL */ + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); /* wrong length */ + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TXTIME; + x = sendmsg(client, &msg, /*flags=*/0); + if (x != -1 && errno != EINVAL) { + errx(1, "sendmsg with invalid SCM_TXTTIME didn't fail with -EINVAL"); + } + + CHECK(close(client)); +} + +static void client(int pipefd) { + char c = 0; + ssize_t x = CHECK(read(pipefd, &c, sizeof(c))); + if (x != sizeof(c)) { + CHECK(-1); + } + CHECK(close(pipefd)); + + int s = CHECK(socket(AF_INET, SOCK_STREAM, 0)); + + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_port = htons(PORT), + .sin_addr = { + /* TODO: remove this once Ubuntu 18.04 is deprecated. */ + .s_addr = 0, + }, + }; + if (inet_aton(SRV_IP, &sa.sin_addr) != 1) { + CHECK(-1); + } + + CHECK(connect(s, (void*)&sa, sizeof(sa))); + + while (1) { + /* Wait for the full data to arive. */ + int v = 0; + CHECK(ioctl(s, FIONREAD, &v)); + if ((unsigned int)v >= sizeof(g_buffer)) { + break; + } + } + + char buf[sizeof(g_buffer) + 1] = { 0 }; + struct iovec iovec = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + + char control[MSG_SPACE] = {0}; + struct msghdr msg = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_control = control, + .msg_controllen = sizeof(control), + }; + + ssize_t count = recvmsg(s, &msg, /*flags=*/0); + if (count != sizeof(g_buffer)) { + errx(1, "recv returned less than available: %zd", count); + } + if (memcmp(buf, g_buffer, sizeof(g_buffer))) { + errx(1, "wrong data received: %s", buf); + } + if (msg.msg_controllen) { + errx(1, "unexpected ancillary data received (length = %zd)", msg.msg_controllen); + } + + CHECK(close(s)); +} + +int main(int argc, char** argv) { + int pipefds[2]; + CHECK(pipe(pipefds)); + + pid_t p = CHECK(fork()); + if (p == 0) { + CHECK(close(pipefds[1])); + client(pipefds[0]); + return 0; + } + + CHECK(close(pipefds[0])); + server(pipefds[1]); + + int status = 0; + CHECK(wait(&status)); + if (!WIFEXITED(status) || WEXITSTATUS(status)) { + errx(1, "child wait status: %#x", status); + } + + puts("TEST OK"); + return 0; +} diff --git a/libos/test/regression/test_libos.py b/libos/test/regression/test_libos.py index d54de2ba23..0d2fa2a97d 100644 --- a/libos/test/regression/test_libos.py +++ b/libos/test/regression/test_libos.py @@ -1336,6 +1336,10 @@ def test_300_socket_tcp_msg_peek(self): stdout, _ = self.run_binary(['tcp_msg_peek']) self.assertIn('TEST OK', stdout) + def test_301_socket_tcp_ancillary(self): + stdout, _ = self.run_binary(['tcp_ancillary']) + self.assertIn('TEST OK', stdout) + def test_310_socket_tcp_ipv6_v6only(self): stdout, _ = self.run_binary(['tcp_ipv6_v6only'], timeout=50) self.assertIn('test completed successfully', stdout) diff --git a/libos/test/regression/tests.toml b/libos/test/regression/tests.toml index bad3961ff6..f36a1c4a2f 100644 --- a/libos/test/regression/tests.toml +++ b/libos/test/regression/tests.toml @@ -109,6 +109,7 @@ manifests = [ "syscall", "syscall_restart", "sysfs_common", + "tcp_ancillary", "tcp_ipv6_v6only", "tcp_msg_peek", "toml_parsing", diff --git a/libos/test/regression/tests_musl.toml b/libos/test/regression/tests_musl.toml index f65a13ef9d..f872d14950 100644 --- a/libos/test/regression/tests_musl.toml +++ b/libos/test/regression/tests_musl.toml @@ -110,6 +110,7 @@ manifests = [ "syscall", "syscall_restart", "sysfs_common", + "tcp_ancillary", "tcp_ipv6_v6only", "tcp_msg_peek", "toml_parsing",