diff --git a/src/tools/perf/perftest.c b/src/tools/perf/perftest.c index 66e370d9048..72316b530ce 100644 --- a/src/tools/perf/perftest.c +++ b/src/tools/perf/perftest.c @@ -290,37 +290,48 @@ static ucx_perf_rte_t sock_rte = { static ucs_status_t setup_sock_rte(struct perftest_context *ctx) { - struct sockaddr_in inaddr; - struct hostent *he; + struct addrinfo hints, *res, *t; ucs_status_t status; int optval = 1; int sockfd, connfd; - int ret; - - sockfd = socket(AF_INET, SOCK_STREAM, 0); - if (sockfd < 0) { - ucs_error("socket() failed: %m"); - status = UCS_ERR_IO_ERROR; + int ret, n; + char service[8]; + + sprintf(service, "%d", ctx->port); + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_flags = (ctx->server_addr == NULL) ? AI_PASSIVE : 0; + hints.ai_family = ctx->ai_family; + hints.ai_socktype = SOCK_STREAM; + + n = getaddrinfo(ctx->server_addr, service, &hints, &res); + if (n < 0) { + ucs_error("getaddrinfo error:: [%s]", gai_strerror(n)); goto err; } if (ctx->server_addr == NULL) { - optval = 1; - status = ucs_socket_setopt(sockfd, SOL_SOCKET, SO_REUSEADDR, - &optval, sizeof(optval)); - if (status != UCS_OK) { - goto err_close_sockfd; + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + status = ucs_socket_setopt(sockfd, SOL_SOCKET, SO_REUSEADDR, + &optval, sizeof(optval)); + if (status != UCS_OK) { + goto err_close_sockfd; + } + + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) { + break; + } + + close(sockfd); + sockfd = -1; + } } - inaddr.sin_family = AF_INET; - inaddr.sin_port = htons(ctx->port); - inaddr.sin_addr.s_addr = INADDR_ANY; - memset(inaddr.sin_zero, 0, sizeof(inaddr.sin_zero)); - ret = bind(sockfd, (struct sockaddr*)&inaddr, sizeof(inaddr)); - if (ret < 0) { - ucs_error("bind() failed: %m"); - status = UCS_ERR_INVALID_ADDR; - goto err_close_sockfd; + if (sockfd < 0) { + ucs_error("socket() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; } ret = listen(sockfd, 10); @@ -375,25 +386,21 @@ static ucs_status_t setup_sock_rte(struct perftest_context *ctx) ctx->sock_rte_group.connfd = connfd; ctx->sock_rte_group.is_server = 1; } else { - he = gethostbyname(ctx->server_addr); - if (he == NULL || he->h_addr_list == NULL) { - ucs_error("host %s not found: %s", ctx->server_addr, - hstrerror(h_errno)); - status = UCS_ERR_INVALID_ADDR; - goto err_close_sockfd; + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) { + break; + } + close(sockfd); + sockfd = -1; + } } - inaddr.sin_family = he->h_addrtype; - inaddr.sin_port = htons(ctx->port); - ucs_assert(he->h_length == sizeof(inaddr.sin_addr)); - memcpy(&inaddr.sin_addr, he->h_addr_list[0], he->h_length); - memset(inaddr.sin_zero, 0, sizeof(inaddr.sin_zero)); - - ret = connect(sockfd, (struct sockaddr*)&inaddr, sizeof(inaddr)); - if (ret < 0) { - ucs_error("connect() failed: %m"); - status = UCS_ERR_UNREACHABLE; - goto err_close_sockfd; + if (sockfd < 0) { + ucs_error("socket() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; } safe_send(sockfd, &ctx->params, sizeof(ctx->params), NULL, NULL); @@ -417,6 +424,8 @@ static ucs_status_t setup_sock_rte(struct perftest_context *ctx) ctx->params.super.rte_group = &ctx->sock_rte_group; ctx->params.super.rte = &sock_rte; ctx->params.super.report_arg = ctx; + + freeaddrinfo(res); return UCS_OK; err_close_connfd: diff --git a/src/tools/perf/perftest.h b/src/tools/perf/perftest.h index 0c4d6ac9793..6d601457700 100644 --- a/src/tools/perf/perftest.h +++ b/src/tools/perf/perftest.h @@ -56,6 +56,7 @@ struct perftest_context { perftest_params_t params; const char *server_addr; int port; + uint8_t ai_family; int mpi; unsigned num_cpus; unsigned cpus[MAX_CPUS]; diff --git a/src/tools/perf/perftest_params.c b/src/tools/perf/perftest_params.c index 4ef0d3f2fe3..41078ee4b1b 100644 --- a/src/tools/perf/perftest_params.c +++ b/src/tools/perf/perftest_params.c @@ -81,6 +81,7 @@ static void usage(const struct perftest_context *ctx, const char *program) printf(" -R percentile rank of the percentile data in latency tests (%.1f)\n", ctx->params.super.percentile_rank); printf(" -p TCP port to use for data exchange (%d)\n", ctx->port); + printf(" -I Use IPv6 address for TCP connection in data exchange\n"); #ifdef HAVE_MPI printf(" -P <0|1> disable/enable MPI mode (%d)\n", ctx->mpi); #endif @@ -523,15 +524,19 @@ ucs_status_t parse_opts(struct perftest_context *ctx, int mpi_initialized, ctx->server_addr = NULL; ctx->num_batch_files = 0; ctx->port = 13337; + ctx->ai_family = AF_INET; ctx->flags = 0; ctx->mpi = mpi_initialized; optind = 1; - while ((c = getopt (argc, argv, "p:b:Nfvc:P:h" TEST_PARAMS_ARGS)) != -1) { + while ((c = getopt (argc, argv, "p:b:INfvc:P:h" TEST_PARAMS_ARGS)) != -1) { switch (c) { case 'p': ctx->port = atoi(optarg); break; + case 'I': + ctx->ai_family = AF_INET6; + break; case 'b': if (ctx->num_batch_files < MAX_BATCH_FILES) { ctx->batch_files[ctx->num_batch_files++] = optarg; diff --git a/src/ucs/sys/sock.c b/src/ucs/sys/sock.c index a52d4623429..4b252871b4a 100644 --- a/src/ucs/sys/sock.c +++ b/src/ucs/sys/sock.c @@ -100,12 +100,59 @@ ucs_status_t ucs_netif_ioctl(const char *if_name, unsigned long request, return status; } +int ucs_netif_get_addr(const char *if_name, uint8_t sa_family, + struct sockaddr *saddr, int is_netmask) +{ + ucs_status_t status = UCS_ERR_NO_DEVICE; + struct ifaddrs *ifa; + struct ifaddrs *ifaddrs; + const struct sockaddr_in6 *saddr6; + + if(getifaddrs(&ifaddrs)) { + ucs_warn("getifaddrs error: %m"); + return UCS_ERR_IO_ERROR; + } + + for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { + if(if_name && (0 != strcmp(if_name, ifa->ifa_name))) { + continue; + } + + if(ifa->ifa_addr == NULL) { + return UCS_ERR_IO_ERROR; + } + + if (ifa->ifa_addr->sa_family == AF_INET6) { + saddr6 = (const struct sockaddr_in6 *)ifa->ifa_addr; + if (IN6_IS_ADDR_LOOPBACK(&saddr6->sin6_addr) || + IN6_IS_ADDR_LINKLOCAL(&saddr6->sin6_addr)) { + continue; + } + } + + if (sa_family == AF_UNSPEC || ifa->ifa_addr->sa_family == sa_family) { + if (saddr) { + if (!is_netmask) { + memcpy(saddr, ifa->ifa_addr, sizeof(struct sockaddr_storage)); + } else { + memcpy(saddr, ifa->ifa_netmask, sizeof(struct sockaddr_storage)); + } + } + + status = UCS_OK; + } + } + + freeifaddrs(ifaddrs); + return status; +} + int ucs_netif_is_active(const char *if_name) { ucs_status_t status; struct ifreq ifr; - status = ucs_netif_ioctl(if_name, SIOCGIFADDR, &ifr); + status = ucs_netif_get_addr(if_name, AF_UNSPEC, NULL, 0); if (status != UCS_OK) { return 0; } @@ -920,38 +967,18 @@ ucs_status_t ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen return status; } -static ucs_status_t ucs_sockaddr_ifreq(const char *if_name, - unsigned long request, - struct sockaddr_in *dest) -{ - ucs_status_t status; - struct ifreq ifr; - - status = ucs_netif_ioctl(if_name, request, &ifr); - if (status != UCS_OK) { - return status; - } - - if (ifr.ifr_addr.sa_family != AF_INET) { - ucs_error("%s address is not INET", if_name); - return UCS_ERR_INVALID_ADDR; - } - - memcpy(dest, &ifr.ifr_addr, sizeof(*dest)); - - return UCS_OK; -} - ucs_status_t -ucs_sockaddr_get_ifaddr(const char *if_name, struct sockaddr_in *addr) +ucs_sockaddr_get_ifaddr(const char *if_name, sa_family_t af, + struct sockaddr *addr) { - return ucs_sockaddr_ifreq(if_name, SIOCGIFADDR, addr); + return ucs_netif_get_addr(if_name, af, addr, 0); } ucs_status_t -ucs_sockaddr_get_ifmask(const char *if_name, struct sockaddr_in *mask) +ucs_sockaddr_get_ifmask(const char *if_name, sa_family_t af, + struct sockaddr *mask) { - return ucs_sockaddr_ifreq(if_name, SIOCGIFNETMASK, mask); + return ucs_netif_get_addr(if_name, af, mask, 1); } const char *ucs_sockaddr_address_family_str(sa_family_t af) diff --git a/src/ucs/sys/sock.h b/src/ucs/sys/sock.h index b4005a3ab3c..81f7e3d873c 100644 --- a/src/ucs/sys/sock.h +++ b/src/ucs/sys/sock.h @@ -540,19 +540,23 @@ ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen); * Copy the IP address associated with the given network interface. * * @param [in] if_name Interface name. + * @param [in] af Address family to check. * @param [out] addr The IP address of the given interface. */ ucs_status_t -ucs_sockaddr_get_ifaddr(const char *if_name, struct sockaddr_in *addr); +ucs_sockaddr_get_ifaddr(const char *if_name, sa_family_t af, + struct sockaddr *addr); /** * Copy the IP subnet mask associated with the given network interface. * * @param [in] if_name Interface name. + * @param [in] af Address family to check. * @param [out] addr The IP address of the given interface. */ ucs_status_t -ucs_sockaddr_get_ifmask(const char *if_name, struct sockaddr_in *mask); +ucs_sockaddr_get_ifmask(const char *if_name, sa_family_t af, + struct sockaddr *mask); /** diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index b96ade8d370..f1eb140aebe 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -1199,7 +1199,7 @@ uct_ib_iface_init_roce_mask_info(uct_ib_iface_t *iface, size_t md_config_index) uct_ib_device_t *dev = uct_ib_iface_device(iface); uint8_t port_num = iface->config.port_num; - struct sockaddr_in mask; + struct sockaddr_storage mask; char ndev_name[IFNAMSIZ]; ucs_status_t status; size_t addr_size; @@ -1214,7 +1214,7 @@ uct_ib_iface_init_roce_mask_info(uct_ib_iface_t *iface, size_t md_config_index) goto mask_info_failed; } - status = ucs_sockaddr_get_ifmask(ndev_name, &mask); + status = ucs_sockaddr_get_ifmask(ndev_name, AF_UNSPEC, (struct sockaddr*)&mask); if (status != UCS_OK) { goto mask_info_failed; } diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index f38d80c1f2d..0541f429926 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -195,7 +195,7 @@ enum { typedef struct uct_tcp_cm_conn_req_pkt { uct_tcp_cm_conn_event_t event; /* Connection event ID */ uint8_t flags; /* Packet flags */ - struct sockaddr_in iface_addr; /* Socket address of UCT local iface */ + struct sockaddr_storage iface_addr; /* Socket address of UCT local iface */ uct_tcp_ep_cm_id_t cm_id; /* EP connection mananger ID */ } UCS_S_PACKED uct_tcp_cm_conn_req_pkt_t; @@ -341,7 +341,7 @@ struct uct_tcp_ep { uct_tcp_ep_cm_id_t cm_id; /* EP connection mananger ID */ uct_tcp_ep_ctx_t tx; /* TX resources */ uct_tcp_ep_ctx_t rx; /* RX resources */ - struct sockaddr_in peer_addr; /* Remote iface addr */ + struct sockaddr_storage peer_addr; /* Remote iface addr */ ucs_queue_head_t pending_q; /* Pending operations */ ucs_queue_head_t put_comp_q; /* Flush completions waiting for * outstanding PUTs acknowledgment */ @@ -388,8 +388,8 @@ typedef struct uct_tcp_iface { size_t hdr_offset; /* Offset in TX buffer to empty space that * can be used for AM Zcopy header */ } zcopy; - struct sockaddr_in ifaddr; /* Network address */ - struct sockaddr_in netmask; /* Network address mask */ + struct sockaddr_storage ifaddr; /* Network address */ + struct sockaddr_storage netmask; /* Network address mask */ int prefer_default; /* Prefer default gateway */ int put_enable; /* Enable PUT Zcopy operation support */ int conn_nb; /* Use non-blocking connect() */ @@ -424,26 +424,27 @@ typedef struct uct_tcp_iface { * TCP interface configuration */ typedef struct uct_tcp_iface_config { - uct_iface_config_t super; - size_t tx_seg_size; - size_t rx_seg_size; - size_t max_iov; - size_t sendv_thresh; - int prefer_default; - int put_enable; - int conn_nb; - unsigned max_poll; - unsigned max_conn_retries; - int sockopt_nodelay; - uct_tcp_send_recv_buf_config_t sockopt; - unsigned syn_cnt; - uct_iface_mpool_config_t tx_mpool; - uct_iface_mpool_config_t rx_mpool; - ucs_range_spec_t port_range; + uct_iface_config_t super; + size_t tx_seg_size; + size_t rx_seg_size; + size_t max_iov; + size_t sendv_thresh; + int prefer_default; + int put_enable; + int conn_nb; + unsigned max_poll; + unsigned max_conn_retries; + int sockopt_nodelay; + uct_tcp_send_recv_buf_config_t sockopt; + unsigned syn_cnt; + uct_iface_mpool_config_t tx_mpool; + uct_iface_mpool_config_t rx_mpool; + ucs_range_spec_t port_range; + UCS_CONFIG_STRING_ARRAY_FIELD(af) af_prio; struct { - ucs_time_t idle; - unsigned long cnt; - ucs_time_t intvl; + ucs_time_t idle; + unsigned long cnt; + ucs_time_t intvl; } keepalive; } uct_tcp_iface_config_t; @@ -476,18 +477,18 @@ void uct_tcp_iface_remove_ep(uct_tcp_ep_t *ep); int uct_tcp_cm_ep_accept_conn(uct_tcp_ep_t *ep); int uct_tcp_iface_is_self_addr(uct_tcp_iface_t *iface, - const struct sockaddr_in *peer_addr); + const struct sockaddr_storage *peer_addr); ucs_status_t uct_tcp_ep_handle_io_err(uct_tcp_ep_t *ep, const char *op_str, ucs_status_t io_status); ucs_status_t uct_tcp_ep_init(uct_tcp_iface_t *iface, int fd, - const struct sockaddr_in *dest_addr, + const struct sockaddr_storage *dest_addr, uct_tcp_ep_t **ep_p); ucs_status_t uct_tcp_ep_set_dest_addr(const uct_device_addr_t *dev_addr, const uct_iface_addr_t *iface_addr, - struct sockaddr *dest_addr); + struct sockaddr_storage *dest_addr); uint64_t uct_tcp_ep_get_cm_id(const uct_tcp_ep_t *ep); @@ -582,12 +583,12 @@ void uct_tcp_cm_change_conn_state(uct_tcp_ep_t *ep, void uct_tcp_cm_ep_set_conn_sn(uct_tcp_ep_t *ep); uct_tcp_ep_t *uct_tcp_cm_get_ep(uct_tcp_iface_t *iface, - const struct sockaddr_in *dest_address, + const struct sockaddr_storage *dest_address, ucs_conn_sn_t conn_sn, uint8_t with_ctx_cap); uct_tcp_ep_t *uct_tcp_cm_get_conn_to_ep(uct_tcp_iface_t *iface, - const struct sockaddr_in *dest_address, + const struct sockaddr_storage *dest_address, ucs_conn_sn_t conn_sn, uint8_t with_ctx_cap); @@ -596,7 +597,7 @@ void uct_tcp_cm_insert_ep(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep); void uct_tcp_cm_remove_ep(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep); ucs_status_t uct_tcp_cm_handle_incoming_conn(uct_tcp_iface_t *iface, - const struct sockaddr_in *peer_addr, + const struct sockaddr_storage *peer_addr, int fd); ucs_status_t uct_tcp_cm_conn_start(uct_tcp_ep_t *ep); diff --git a/src/uct/tcp/tcp_cm.c b/src/uct/tcp/tcp_cm.c index c4d37752fae..e6dad7765e1 100644 --- a/src/uct/tcp/tcp_cm.c +++ b/src/uct/tcp/tcp_cm.c @@ -287,7 +287,7 @@ void uct_tcp_cm_ep_set_conn_sn(uct_tcp_ep_t *ep) } uct_tcp_ep_t *uct_tcp_cm_get_ep(uct_tcp_iface_t *iface, - const struct sockaddr_in *dest_address, + const struct sockaddr_storage *dest_address, ucs_conn_sn_t conn_sn, uint8_t with_ctx_cap) { @@ -502,7 +502,7 @@ uct_tcp_cm_verify_req_connected_ep(uct_tcp_ep_t *ep, /* copy iface_addr to the local variable to avoid potential unaligned access * when need to get the address of uct_tcp_cm_conn_req_pkt_t::iface_addr, * since uct_tcp_cm_conn_req_pkt_t is a packed structure */ - struct sockaddr_in pkt_addr = cm_req_pkt->iface_addr; + struct sockaddr_storage pkt_addr = cm_req_pkt->iface_addr; ucs_status_t status; return (ep->cm_id.conn_sn == cm_req_pkt->cm_id.conn_sn) && @@ -732,7 +732,7 @@ ucs_status_t uct_tcp_cm_conn_start(uct_tcp_ep_t *ep) /* This function is called from async thread */ ucs_status_t uct_tcp_cm_handle_incoming_conn(uct_tcp_iface_t *iface, - const struct sockaddr_in *peer_addr, + const struct sockaddr_storage *peer_addr, int fd) { char str_local_addr[UCS_SOCKADDR_STRING_LEN]; diff --git a/src/uct/tcp/tcp_ep.c b/src/uct/tcp/tcp_ep.c index 5aee50f018b..d833c519105 100644 --- a/src/uct/tcp/tcp_ep.c +++ b/src/uct/tcp/tcp_ep.c @@ -128,15 +128,14 @@ static inline void uct_tcp_ep_ctx_reset(uct_tcp_ep_ctx_t *ctx) uct_tcp_ep_ctx_rewind(ctx); } -static void uct_tcp_ep_addr_cleanup(struct sockaddr_in *sock_addr) +static void uct_tcp_ep_addr_cleanup(struct sockaddr_storage *sock_addr) { memset(sock_addr, 0, sizeof(*sock_addr)); } -static void uct_tcp_ep_addr_init(struct sockaddr_in *sock_addr, - const struct sockaddr_in *peer_addr) +static void uct_tcp_ep_addr_init(struct sockaddr_storage *sock_addr, + const struct sockaddr_storage *peer_addr) { - /* TODO: handle IPv4 and IPv6 */ if (peer_addr == NULL) { uct_tcp_ep_addr_cleanup(sock_addr); } else { @@ -244,7 +243,7 @@ uct_tcp_ep_t *uct_tcp_ep_ptr_map_retrieve(uct_tcp_iface_t *iface, } static UCS_CLASS_INIT_FUNC(uct_tcp_ep_t, uct_tcp_iface_t *iface, - int fd, const struct sockaddr_in *dest_addr) + int fd, const struct sockaddr_storage *dest_addr) { UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super) @@ -427,7 +426,7 @@ UCS_CLASS_DEFINE(uct_tcp_ep_t, uct_base_ep_t); UCS_CLASS_DEFINE_NAMED_NEW_FUNC(uct_tcp_ep_init, uct_tcp_ep_t, uct_tcp_ep_t, uct_tcp_iface_t*, int, - const struct sockaddr_in*) + const struct sockaddr_storage*) UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(uct_tcp_ep_destroy_internal, uct_tcp_ep_t, uct_ep_t) @@ -577,11 +576,12 @@ static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep) static ucs_status_t uct_tcp_ep_create_socket_and_connect(uct_tcp_ep_t *ep) { - uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, - uct_tcp_iface_t); + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + const struct sockaddr *peer_addr = (struct sockaddr*)&ep->peer_addr; ucs_status_t status; - status = ucs_socket_create(AF_INET, SOCK_STREAM, &ep->fd); + status = ucs_socket_create(peer_addr->sa_family, SOCK_STREAM, &ep->fd); if (status != UCS_OK) { goto err; } @@ -728,7 +728,7 @@ static ucs_status_t uct_tcp_ep_connect(uct_tcp_ep_t *ep) ucs_status_t uct_tcp_ep_set_dest_addr(const uct_device_addr_t *dev_addr, const uct_iface_addr_t *iface_addr, - struct sockaddr *dest_addr) + struct sockaddr_storage *dest_addr) { uct_tcp_device_addr_t *tcp_dev_addr = (uct_tcp_device_addr_t*)dev_addr; uct_tcp_iface_addr_t *tcp_iface_addr = (uct_tcp_iface_addr_t*)iface_addr; @@ -746,16 +746,15 @@ ucs_status_t uct_tcp_ep_set_dest_addr(const uct_device_addr_t *dev_addr, in_addr = tcp_dev_addr + 1; } - /* TODO: handle AF_INET6 */ - dest_addr->sa_family = tcp_dev_addr->sa_family; - ucs_assert(dest_addr->sa_family == AF_INET); + dest_addr->ss_family = tcp_dev_addr->sa_family; - status = ucs_sockaddr_set_inet_addr(dest_addr, in_addr); + status = ucs_sockaddr_set_inet_addr((struct sockaddr*)dest_addr, in_addr); if (status != UCS_OK) { return status; } - return ucs_sockaddr_set_port(dest_addr, ntohs(tcp_iface_addr->port)); + return ucs_sockaddr_set_port((struct sockaddr*)dest_addr, + ntohs(tcp_iface_addr->port)); } uint64_t uct_tcp_ep_get_cm_id(const uct_tcp_ep_t *ep) @@ -766,18 +765,18 @@ uint64_t uct_tcp_ep_get_cm_id(const uct_tcp_ep_t *ep) ucs_status_t uct_tcp_ep_create(const uct_ep_params_t *params, uct_ep_h *ep_p) { - uct_tcp_iface_t *iface = ucs_derived_of(params->iface, - uct_tcp_iface_t); - uct_tcp_ep_t *ep = NULL; - struct sockaddr_in *ep_dest_addr = NULL; - struct sockaddr_in dest_addr; + uct_tcp_iface_t *iface = ucs_derived_of(params->iface, + uct_tcp_iface_t); + uct_tcp_ep_t *ep = NULL; + struct sockaddr_storage *ep_dest_addr = NULL; + struct sockaddr_storage dest_addr; ucs_status_t status; if (ucs_test_all_flags(params->field_mask, UCT_EP_PARAM_FIELD_DEV_ADDR | UCT_EP_PARAM_FIELD_IFACE_ADDR)) { status = uct_tcp_ep_set_dest_addr(params->dev_addr, params->iface_addr, - (struct sockaddr*)&dest_addr); + &dest_addr); if (status != UCS_OK) { return status; } @@ -851,7 +850,7 @@ ucs_status_t uct_tcp_ep_connect_to_ep(uct_ep_h tl_ep, status = uct_tcp_ep_set_dest_addr(dev_addr, (uct_iface_addr_t*)&addr->iface_addr, - (struct sockaddr*)&ep->peer_addr); + &ep->peer_addr); if (status != UCS_OK) { return status; } diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 0ecd7b5840f..820b0773acd 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -86,6 +86,10 @@ static ucs_config_field_t uct_tcp_iface_config_table[] = { "let the operating system select the port number.", ucs_offsetof(uct_tcp_iface_config_t, port_range), UCS_CONFIG_TYPE_RANGE_SPEC}, + {"AF_PRIO", "inet,inet6", + "Priority of address types used for socket connections", + ucs_offsetof(uct_tcp_iface_config_t, af_prio), UCS_CONFIG_TYPE_STRING_ARRAY}, + #ifdef UCT_TCP_EP_KEEPALIVE {"KEEPIDLE", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE) "s", "The time the connection needs to remain idle before TCP starts sending " @@ -124,7 +128,7 @@ static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface, ucs_status_t status; dev_addr->flags = 0; - dev_addr->sa_family = iface->config.ifaddr.sin_family; + dev_addr->sa_family = saddr->sa_family; if (ucs_sockaddr_is_inaddr_loopback(saddr)) { dev_addr->flags |= UCT_TCP_DEVICE_ADDR_FLAG_LOOPBACK; @@ -167,8 +171,14 @@ uct_tcp_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *addr) uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); uct_tcp_iface_addr_t *iface_addr = (uct_tcp_iface_addr_t*)addr; + ucs_status_t status; + uint16_t port; + + status = ucs_sockaddr_get_port((struct sockaddr *)&iface->config.ifaddr, + &port); + ucs_assert_always(status == UCS_OK); + iface_addr->port = htons(port); - iface_addr->port = iface->config.ifaddr.sin_port; return UCS_OK; } @@ -339,7 +349,7 @@ uct_tcp_iface_connect_handler(int listen_fd, ucs_event_set_types_t events, void *arg) { uct_tcp_iface_t *iface = arg; - struct sockaddr_in peer_addr; + struct sockaddr_storage peer_addr; socklen_t addrlen; ucs_status_t status; int fd; @@ -425,10 +435,11 @@ static uct_iface_ops_t uct_tcp_iface_ops = { static ucs_status_t uct_tcp_iface_server_init(uct_tcp_iface_t *iface) { - struct sockaddr_in bind_addr = iface->config.ifaddr; - unsigned port_range_start = iface->port_range.first; - unsigned port_range_end = iface->port_range.last; + struct sockaddr_storage bind_addr = iface->config.ifaddr; + unsigned port_range_start = iface->port_range.first; + unsigned port_range_end = iface->port_range.last; ucs_status_t status; + size_t addr_len; int port, retry; /* retry is 1 for a range of ports or when port value is zero. @@ -445,13 +456,18 @@ static ucs_status_t uct_tcp_iface_server_init(uct_tcp_iface_t *iface) port = 0; /* let the operating system choose the port */ } - status = ucs_sockaddr_set_port((struct sockaddr *)&bind_addr, port); + status = ucs_sockaddr_set_port((struct sockaddr*)&bind_addr, port); if (status != UCS_OK) { break; } - status = ucs_socket_server_init((struct sockaddr *)&bind_addr, - sizeof(bind_addr), ucs_socket_max_conn(), + status = ucs_sockaddr_sizeof((struct sockaddr*)&bind_addr, &addr_len); + if (status != UCS_OK) { + return status; + } + + status = ucs_socket_server_init((struct sockaddr*)&bind_addr, + addr_len, ucs_socket_max_conn(), retry, 0, &iface->listen_fd); } while (retry && (status == UCS_ERR_BUSY)); @@ -460,10 +476,11 @@ static ucs_status_t uct_tcp_iface_server_init(uct_tcp_iface_t *iface) static ucs_status_t uct_tcp_iface_listener_init(uct_tcp_iface_t *iface) { - struct sockaddr_in bind_addr = iface->config.ifaddr; - socklen_t socklen = sizeof(bind_addr); + struct sockaddr_storage bind_addr = iface->config.ifaddr; + socklen_t socklen = sizeof(bind_addr); char ip_port_str[UCS_SOCKADDR_STRING_LEN]; ucs_status_t status; + uint16_t port; int ret; status = uct_tcp_iface_server_init(iface); @@ -472,14 +489,22 @@ static ucs_status_t uct_tcp_iface_listener_init(uct_tcp_iface_t *iface) } /* Get the port which was selected for the socket */ - ret = getsockname(iface->listen_fd, (struct sockaddr *)&bind_addr, &socklen); + ret = getsockname(iface->listen_fd, (struct sockaddr*)&bind_addr, &socklen); if (ret < 0) { ucs_error("getsockname(fd=%d) failed: %m", iface->listen_fd); status = UCS_ERR_IO_ERROR; goto err_close_sock; } - iface->config.ifaddr.sin_port = bind_addr.sin_port; + status = ucs_sockaddr_get_port((struct sockaddr*)&bind_addr, &port); + if (status != UCS_OK) { + goto err_close_sock; + } + + status = ucs_sockaddr_set_port((struct sockaddr*)&iface->config.ifaddr, port); + if (status != UCS_OK) { + goto err_close_sock; + } /* Register event handler for incoming connections */ status = ucs_async_set_event_handler(iface->super.worker->async->mode, @@ -564,6 +589,8 @@ static UCS_CLASS_INIT_FUNC(uct_tcp_iface_t, uct_md_h md, uct_worker_h worker, uct_tcp_iface_config_t *config = ucs_derived_of(tl_config, uct_tcp_iface_config_t); ucs_status_t status; + sa_family_t af; + int i; UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); @@ -689,12 +716,33 @@ static UCS_CLASS_INIT_FUNC(uct_tcp_iface_t, uct_md_h md, uct_worker_h worker, goto err_cleanup_tx_mpool; } - status = ucs_sockaddr_get_ifaddr(self->if_name, &self->config.ifaddr); - if (status != UCS_OK) { - goto err_cleanup_rx_mpool; + ucs_assert_always(config->af_prio.count > 0); + + for (i = 0; i < config->af_prio.count; i++) { + if (!strcasecmp(config->af_prio.af[i], "inet")) { + af = AF_INET; + } else if (!strcasecmp(config->af_prio.af[i], "inet6")) { + af = AF_INET6; + } else { + ucs_error("invalid address family:%s", config->af_prio.af[i]); + goto err_cleanup_tx_mpool; + } + + status = ucs_sockaddr_get_ifaddr(self->if_name, af, (struct sockaddr *) + &self->config.ifaddr); + if (status != UCS_OK) { + continue; + } + + status = ucs_sockaddr_get_ifmask(self->if_name, af, (struct sockaddr *) + &self->config.netmask); + if (status != UCS_OK) { + continue; + } + + break; } - status = ucs_sockaddr_get_ifmask(self->if_name, &self->config.netmask); if (status != UCS_OK) { goto err_cleanup_rx_mpool; } @@ -752,7 +800,7 @@ void uct_tcp_iface_remove_ep(uct_tcp_ep_t *ep) } int uct_tcp_iface_is_self_addr(uct_tcp_iface_t *iface, - const struct sockaddr_in *peer_addr) + const struct sockaddr_storage *peer_addr) { ucs_status_t status; int cmp; diff --git a/test/gtest/uct/tcp/test_tcp.cc b/test/gtest/uct/tcp/test_tcp.cc index c9b77f50875..ea96471e42b 100644 --- a/test/gtest/uct/tcp/test_tcp.cc +++ b/test/gtest/uct/tcp/test_tcp.cc @@ -179,14 +179,14 @@ class test_uct_tcp : public uct_test { status = uct_iface_get_address(to.iface(), iface_addr); ASSERT_UCS_OK(status); - struct sockaddr dest_addr; + struct sockaddr_storage dest_addr; uct_tcp_ep_set_dest_addr(dev_addr, iface_addr, &dest_addr); int fd; status = ucs_socket_create(AF_INET, SOCK_STREAM, &fd); ASSERT_UCS_OK(status); - status = ucs_socket_connect(fd, &dest_addr); + status = ucs_socket_connect(fd, (struct sockaddr*)&dest_addr); ASSERT_UCS_OK(status); status = ucs_sys_fcntl_modfl(fd, O_NONBLOCK, 0);