svn commit: r361275 - in head/sys: conf dev/hyperv/hvsock dev/hyperv/include dev/hyperv/vmbus modules/hyperv modules/hyperv/hvsock sys
Wei Hu
whu at FreeBSD.org
Wed May 20 11:04:00 UTC 2020
Author: whu
Date: Wed May 20 11:03:59 2020
New Revision: 361275
URL: https://svnweb.freebsd.org/changeset/base/361275
Log:
HyperV socket implementation for FreeBSD
This change adds Hyper-V socket feature in FreeBSD. New socket address
family AF_HYPERV and its kernel support are added.
Submitted by: Wei Hu <weh at microsoft.com>
Reviewed by: Dexuan Cui <decui at microsoft.com>
Relnotes: yes
Sponsored by: Microsoft
Differential Revision: https://reviews.freebsd.org/D24061
Added:
head/sys/dev/hyperv/hvsock/
head/sys/dev/hyperv/hvsock/hv_sock.c (contents, props changed)
head/sys/dev/hyperv/hvsock/hv_sock.h (contents, props changed)
head/sys/modules/hyperv/hvsock/
head/sys/modules/hyperv/hvsock/Makefile (contents, props changed)
Modified:
head/sys/conf/files.x86
head/sys/dev/hyperv/include/vmbus.h
head/sys/dev/hyperv/vmbus/vmbus.c
head/sys/dev/hyperv/vmbus/vmbus_br.c
head/sys/dev/hyperv/vmbus/vmbus_brvar.h
head/sys/dev/hyperv/vmbus/vmbus_chan.c
head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
head/sys/dev/hyperv/vmbus/vmbus_reg.h
head/sys/modules/hyperv/Makefile
head/sys/sys/socket.h
Modified: head/sys/conf/files.x86
==============================================================================
--- head/sys/conf/files.x86 Wed May 20 11:01:10 2020 (r361274)
+++ head/sys/conf/files.x86 Wed May 20 11:03:59 2020 (r361275)
@@ -133,6 +133,7 @@ dev/hwpmc/hwpmc_core.c optional hwpmc
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc
+dev/hyperv/hvsock/hv_sock.c optional hyperv
dev/hyperv/input/hv_kbd.c optional hyperv
dev/hyperv/input/hv_kbdc.c optional hyperv
dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci
Added: head/sys/dev/hyperv/hvsock/hv_sock.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/sys/dev/hyperv/hvsock/hv_sock.c Wed May 20 11:03:59 2020 (r361275)
@@ -0,0 +1,1748 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domain.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/sockbuf.h>
+#include <sys/sx.h>
+#include <sys/uio.h>
+
+#include <net/vnet.h>
+
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+
+#include "hv_sock.h"
+
+#define HVSOCK_DBG_NONE 0x0
+#define HVSOCK_DBG_INFO 0x1
+#define HVSOCK_DBG_ERR 0x2
+#define HVSOCK_DBG_VERBOSE 0x3
+
+
+SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
+
+static int hvs_dbg_level;
+SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
+ 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
+
+
+#define HVSOCK_DBG(level, ...) do { \
+ if (hvs_dbg_level >= (level)) \
+ printf(__VA_ARGS__); \
+ } while (0)
+
+MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
+
+/* The MTU is 16KB per host side's design */
+#define HVSOCK_MTU_SIZE (1024 * 16)
+#define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header))
+
+#define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header))
+
+#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \
+ roundup2(payload_len, 8) + \
+ sizeof(uint64_t))
+
+
+static struct domain hv_socket_domain;
+
+/*
+ * HyperV Transport sockets
+ */
+static struct pr_usrreqs hvs_trans_usrreqs = {
+ .pru_attach = hvs_trans_attach,
+ .pru_bind = hvs_trans_bind,
+ .pru_listen = hvs_trans_listen,
+ .pru_accept = hvs_trans_accept,
+ .pru_connect = hvs_trans_connect,
+ .pru_peeraddr = hvs_trans_peeraddr,
+ .pru_sockaddr = hvs_trans_sockaddr,
+ .pru_soreceive = hvs_trans_soreceive,
+ .pru_sosend = hvs_trans_sosend,
+ .pru_disconnect = hvs_trans_disconnect,
+ .pru_close = hvs_trans_close,
+ .pru_detach = hvs_trans_detach,
+ .pru_shutdown = hvs_trans_shutdown,
+ .pru_abort = hvs_trans_abort,
+};
+
+/*
+ * Definitions of protocols supported in HyperV socket domain
+ */
+static struct protosw hv_socket_protosw[] = {
+{
+ .pr_type = SOCK_STREAM,
+ .pr_domain = &hv_socket_domain,
+ .pr_protocol = HYPERV_SOCK_PROTO_TRANS,
+ .pr_flags = PR_CONNREQUIRED,
+ .pr_init = hvs_trans_init,
+ .pr_usrreqs = &hvs_trans_usrreqs,
+},
+};
+
+static struct domain hv_socket_domain = {
+ .dom_family = AF_HYPERV,
+ .dom_name = "hyperv",
+ .dom_protosw = hv_socket_protosw,
+ .dom_protoswNPROTOSW = &hv_socket_protosw[nitems(hv_socket_protosw)]
+};
+
+VNET_DOMAIN_SET(hv_socket_);
+
+#define MAX_PORT ((uint32_t)0xFFFFFFFF)
+#define MIN_PORT ((uint32_t)0x0)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const struct hyperv_guid srv_id_template = {
+ .hv_guid = {
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
+ 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
+};
+
+static int hvsock_br_callback(void *, int, void *);
+static uint32_t hvsock_canread_check(struct hvs_pcb *);
+static uint32_t hvsock_canwrite_check(struct hvs_pcb *);
+static int hvsock_send_data(struct vmbus_channel *chan,
+ struct uio *uio, uint32_t to_write, struct sockbuf *sb);
+
+
+
+/* Globals */
+static struct sx hvs_trans_socks_sx;
+static struct mtx hvs_trans_socks_mtx;
+static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks;
+static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks;
+static uint32_t previous_auto_bound_port;
+
+static void
+hvsock_print_guid(struct hyperv_guid *guid)
+{
+ unsigned char *p = (unsigned char *)guid;
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO,
+ "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
+ *(unsigned int *)p,
+ *((unsigned short *) &p[4]),
+ *((unsigned short *) &p[6]),
+ p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+}
+
+static bool
+is_valid_srv_id(const struct hyperv_guid *id)
+{
+ return !memcmp(&id->hv_guid[4],
+ &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
+}
+
+static unsigned int
+get_port_by_srv_id(const struct hyperv_guid *srv_id)
+{
+ return *((const unsigned int *)srv_id);
+}
+
+static void
+set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
+{
+ *((unsigned int *)srv_id) = port;
+}
+
+
+static void
+__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
+{
+ struct hvs_pcb *p = NULL;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+ if (!pcb)
+ return;
+
+ if (list & HVS_LIST_BOUND) {
+ LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+ if (p == pcb)
+ LIST_REMOVE(p, bound_next);
+ }
+
+ if (list & HVS_LIST_CONNECTED) {
+ LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+ if (p == pcb)
+ LIST_REMOVE(pcb, connected_next);
+ }
+}
+
+static void
+__hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+ __hvs_remove_pcb_from_list(pcb, list);
+}
+
+static void
+__hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ if (list & HVS_LIST_BOUND)
+ LIST_INSERT_HEAD(&hvs_trans_bound_socks,
+ pcb, bound_next);
+
+ if (list & HVS_LIST_CONNECTED)
+ LIST_INSERT_HEAD(&hvs_trans_connected_socks,
+ pcb, connected_next);
+}
+
+void
+hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+ if (!so || !so->so_pcb) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: socket or so_pcb is null\n", __func__);
+ return;
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_socket_from_list(so, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static void
+hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+ if (!so || !so->so_pcb) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: socket or so_pcb is null\n", __func__);
+ return;
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_insert_socket_on_list(so, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static struct socket *
+__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+ struct hvs_pcb *p = NULL;
+
+ if (list & HVS_LIST_BOUND)
+ LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+ if (p->so != NULL &&
+ addr->hvs_port == p->local_addr.hvs_port)
+ return p->so;
+
+ if (list & HVS_LIST_CONNECTED)
+ LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+ if (p->so != NULL &&
+ addr->hvs_port == p->local_addr.hvs_port)
+ return p->so;
+
+ return NULL;
+}
+
+static struct socket *
+hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+ struct socket *s = NULL;
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ s = __hvs_find_socket_on_list(addr, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ return s;
+}
+
+static inline void
+hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
+{
+ memset(addr, 0, sizeof(*addr));
+ addr->sa_family = AF_HYPERV;
+ addr->hvs_port = port;
+}
+
+void
+hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
+{
+ hvs_addr_set(addr, get_port_by_srv_id(svr_id));
+}
+
+int
+hvs_trans_lock(void)
+{
+ sx_xlock(&hvs_trans_socks_sx);
+ return (0);
+}
+
+void
+hvs_trans_unlock(void)
+{
+ sx_xunlock(&hvs_trans_socks_sx);
+}
+
+void
+hvs_trans_init(void)
+{
+ /* Skip initialization of globals for non-default instances. */
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+
+ if (vm_guest != VM_GUEST_HV)
+ return;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_init called\n", __func__);
+
+ /* Initialize Globals */
+ previous_auto_bound_port = MAX_PORT;
+ sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
+ mtx_init(&hvs_trans_socks_mtx,
+ "hvs_trans_socks_mtx", NULL, MTX_DEF);
+ LIST_INIT(&hvs_trans_bound_socks);
+ LIST_INIT(&hvs_trans_connected_socks);
+}
+
+/*
+ * Called in two cases:
+ * 1) When user calls socket();
+ * 2) When we accept new incoming conneciton and call sonewconn().
+ */
+int
+hvs_trans_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_attach called\n", __func__);
+
+ if (so->so_type != SOCK_STREAM)
+ return (ESOCKTNOSUPPORT);
+
+ if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
+ return (EPROTONOSUPPORT);
+
+ if (pcb != NULL)
+ return (EISCONN);
+ pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
+ if (pcb == NULL)
+ return (ENOMEM);
+
+ pcb->so = so;
+ so->so_pcb = (void *)pcb;
+
+ return (0);
+}
+
+void
+hvs_trans_detach(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_detach called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (SOLISTENING(so)) {
+ bzero(pcb, sizeof(*pcb));
+ free(pcb, M_HVSOCK);
+ }
+
+ so->so_pcb = NULL;
+
+ hvs_trans_unlock();
+}
+
+int
+hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
+ int error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_bind called\n", __func__);
+
+ if (sa == NULL) {
+ return (EINVAL);
+ }
+
+ if (pcb == NULL) {
+ return (EINVAL);
+ }
+
+ if (sa->sa_family != AF_HYPERV) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: Not supported, sa_family is %u\n",
+ __func__, sa->sa_family);
+ return (EAFNOSUPPORT);
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ if (__hvs_find_socket_on_list(sa,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
+ error = EADDRINUSE;
+ } else {
+ /*
+ * The address is available for us to bind.
+ * Add socket to the bound list.
+ */
+ hvs_addr_set(&pcb->local_addr, sa->hvs_port);
+ hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
+ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+ }
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ return (error);
+}
+
+int
+hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct socket *bound_so;
+ int error;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_listen called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* Check if the address is already bound and it was by us. */
+ bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
+ if (bound_so == NULL || bound_so != so) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: Address not bound or not by us.\n", __func__);
+ return (EADDRNOTAVAIL);
+ }
+
+ SOCK_LOCK(so);
+ error = solisten_proto_check(so);
+ if (error == 0)
+ solisten_proto(so, backlog);
+ SOCK_UNLOCK(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket listen error = %d\n", __func__, error);
+ return (error);
+}
+
+int
+hvs_trans_accept(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_accept called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
+ M_NOWAIT);
+
+ return ((*nam == NULL) ? ENOMEM : 0);
+}
+
+int
+hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
+ bool found_auto_bound_port = false;
+ int i, error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
+ __func__, raddr->hvs_port);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* Verify the remote address */
+ if (raddr == NULL)
+ return (EINVAL);
+ if (raddr->sa_family != AF_HYPERV)
+ return (EAFNOSUPPORT);
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: socket connect in progress\n",
+ __func__);
+ error = EINPROGRESS;
+ goto out;
+ }
+
+ /*
+ * Find an available port for us to auto bind the local
+ * address.
+ */
+ hvs_addr_set(&pcb->local_addr, 0);
+
+ for (i = previous_auto_bound_port - 1;
+ i != previous_auto_bound_port; i --) {
+ if (i == MIN_PORT)
+ i = MAX_PORT;
+
+ pcb->local_addr.hvs_port = i;
+
+ if (__hvs_find_socket_on_list(&pcb->local_addr,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
+ found_auto_bound_port = true;
+ previous_auto_bound_port = i;
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: found local bound port is %x\n",
+ __func__, pcb->local_addr.hvs_port);
+ break;
+ }
+ }
+
+ if (found_auto_bound_port == true) {
+ /* Found available port for auto bound, put on list */
+ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+ /* Set VM service ID */
+ pcb->vm_srv_id = srv_id_template;
+ set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
+ /* Set host service ID and remote port */
+ pcb->host_srv_id = srv_id_template;
+ set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
+ hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
+
+ /* Change the socket state to SS_ISCONNECTING */
+ soisconnecting(so);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: No local port available for auto bound\n",
+ __func__);
+ error = EADDRINUSE;
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
+ hvsock_print_guid(&pcb->vm_srv_id);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
+ hvsock_print_guid(&pcb->host_srv_id);
+
+out:
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ if (found_auto_bound_port == true)
+ vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
+
+ return (error);
+}
+
+int
+hvs_trans_disconnect(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return (EINVAL);
+ }
+
+ /* If socket is already disconnected, skip this */
+ if ((so->so_state & SS_ISDISCONNECTED) == 0)
+ soisdisconnecting(so);
+
+ hvs_trans_unlock();
+
+ return (0);
+}
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
+struct hvs_callback_arg {
+ struct uio *uio;
+ struct sockbuf *sb;
+};
+
+int
+hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
+ struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+ ssize_t orig_resid;
+ uint32_t canread, to_read;
+ int flags, error = 0;
+ struct hvs_callback_arg cbarg;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (pcb == NULL)
+ return (EINVAL);
+
+ if (flagsp != NULL)
+ flags = *flagsp &~ MSG_EOR;
+ else
+ flags = 0;
+
+ if (flags & MSG_PEEK)
+ return (EOPNOTSUPP);
+
+ /* If no space to copy out anything */
+ if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
+ return (EINVAL);
+
+ sb = &so->so_rcv;
+
+ orig_resid = uio->uio_resid;
+
+ /* Prevent other readers from entering the socket. */
+ error = sblock(sb, SBLOCKWAIT(flags));
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: sblock returned error = %d\n", __func__, error);
+ return (error);
+ }
+
+ SOCKBUF_LOCK(sb);
+
+ cbarg.uio = uio;
+ cbarg.sb = sb;
+ /*
+ * If the socket is closing, there might still be some data
+ * in rx br to read. However we need to make sure
+ * the channel is still open.
+ */
+ if ((sb->sb_state & SBS_CANTRCVMORE) &&
+ (so->so_state & SS_ISDISCONNECTED)) {
+ /* Other thread already closed the channel */
+ error = EPIPE;
+ goto out;
+ }
+
+ while (true) {
+ while (uio->uio_resid > 0 &&
+ (canread = hvsock_canread_check(pcb)) > 0) {
+ to_read = MIN(canread, uio->uio_resid);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: to_read = %u, skip = %u\n", __func__, to_read,
+ (unsigned int)(sizeof(struct hvs_pkt_header) +
+ pcb->recv_data_off));
+
+ error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
+ sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
+ hvsock_br_callback, (void *)&cbarg);
+ /*
+ * It is possible socket is disconnected becasue
+ * we released lock in hvsock_br_callback. So we
+ * need to check the state to make sure it is not
+ * disconnected.
+ */
+ if (error || so->so_state & SS_ISDISCONNECTED) {
+ break;
+ }
+
+ pcb->recv_data_len -= to_read;
+ pcb->recv_data_off += to_read;
+ }
+
+ if (error)
+ break;
+
+ /* Abort if socket has reported problems. */
+ if (so->so_error) {
+ if (so->so_error == ESHUTDOWN &&
+ orig_resid > uio->uio_resid) {
+ /*
+ * Although we got a FIN, we also received
+ * some data in this round. Delivery it
+ * to user.
+ */
+ error = 0;
+ } else {
+ if (so->so_error != ESHUTDOWN)
+ error = so->so_error;
+ }
+
+ break;
+ }
+
+ /* Cannot received more. */
+ if (sb->sb_state & SBS_CANTRCVMORE)
+ break;
+
+ /* We are done if buffer has been filled */
+ if (uio->uio_resid == 0)
+ break;
+
+ if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
+ break;
+
+ /* Buffer ring is empty and we shall not block */
+ if ((so->so_state & SS_NBIO) ||
+ (flags & (MSG_DONTWAIT|MSG_NBIO))) {
+ if (orig_resid == uio->uio_resid) {
+ /* We have not read anything */
+ error = EAGAIN;
+ }
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: non blocked read return, error %d.\n",
+ __func__, error);
+ break;
+ }
+
+ /*
+ * Wait and block until (more) data comes in.
+ * Note: Drops the sockbuf lock during wait.
+ */
+ error = sbwait(sb);
+
+ if (error)
+ break;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: wake up from sbwait, read available is %u\n",
+ __func__, vmbus_chan_read_available(pcb->chan));
+ }
+
+out:
+ SOCKBUF_UNLOCK(sb);
+
+ sbunlock(sb);
+
+ /* We recieved a FIN in this call */
+ if (so->so_error == ESHUTDOWN) {
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ /* Send has already closed */
+ soisdisconnecting(so);
+ } else {
+ /* Just close the receive side */
+ socantrcvmore(so);
+ }
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: returning error = %d, so_error = %d\n",
+ __func__, error, so->so_error);
+
+ return (error);
+}
+
+int
+hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+ struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+ ssize_t orig_resid;
+ uint32_t canwrite, to_write;
+ int error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n",
+ __func__, uio->uio_resid);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* If nothing to send */
+ if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ sb = &so->so_snd;
+
+ orig_resid = uio->uio_resid;
+
+ /* Prevent other writers from entering the socket. */
+ error = sblock(sb, SBLOCKWAIT(flags));
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: sblock returned error = %d\n", __func__, error);
+ return (error);
+ }
+
+ SOCKBUF_LOCK(sb);
+
+ if ((sb->sb_state & SBS_CANTSENDMORE) ||
+ so->so_error == ESHUTDOWN) {
+ error = EPIPE;
+ goto out;
+ }
+
+ while (uio->uio_resid > 0) {
+ canwrite = hvsock_canwrite_check(pcb);
+ if (canwrite == 0) {
+ /* We have sent some data */
+ if (orig_resid > uio->uio_resid)
+ break;
+ /*
+ * We have not sent any data and it is
+ * non-blocked io
+ */
+ if (so->so_state & SS_NBIO ||
+ (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
+ error = EWOULDBLOCK;
+ break;
+ } else {
+ /*
+ * We are here because there is no space on
+ * send buffer ring. Signal the other side
+ * to read and free more space.
+ * Sleep wait until space avaiable to send
+ * Note: Drops the sockbuf lock during wait.
+ */
+ error = sbwait(sb);
+
+ if (error)
+ break;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: wake up from sbwait, space avail on "
+ "tx ring is %u\n",
+ __func__,
+ vmbus_chan_write_available(pcb->chan));
+
+ continue;
+ }
+ }
+ to_write = MIN(canwrite, uio->uio_resid);
+ to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canwrite is %u, to_write = %u\n", __func__,
+ canwrite, to_write);
+ error = hvsock_send_data(pcb->chan, uio, to_write, sb);
+
+ if (error)
+ break;
+ }
+
+out:
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+
+ return (error);
+}
+
+int
+hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
+
+ return ((*nam == NULL)? ENOMEM : 0);
+}
+
+int
+hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
+
+ return ((*nam == NULL)? ENOMEM : 0);
+}
+
+void
+hvs_trans_close(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_close called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (!pcb) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (so->so_state & SS_ISCONNECTED) {
+ /* Send a FIN to peer */
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: hvs_trans_close sending a FIN to host\n", __func__);
+ (void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
+ }
+
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+ soisdisconnected(so);
+
+ pcb->chan = NULL;
+ pcb->so = NULL;
+
+ if (SOLISTENING(so)) {
+ mtx_lock(&hvs_trans_socks_mtx);
+ /* Remove from bound list */
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ mtx_unlock(&hvs_trans_socks_mtx);
+ }
+
+ hvs_trans_unlock();
+
+ return;
+}
+
+void
+hvs_trans_abort(struct socket *so)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_abort called\n", __func__);
+
+ (void) hvs_trans_lock();
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (SOLISTENING(so)) {
+ mtx_lock(&hvs_trans_socks_mtx);
+ /* Remove from bound list */
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ mtx_unlock(&hvs_trans_socks_mtx);
+ }
+
+ if (so->so_state & SS_ISCONNECTED) {
+ (void) sodisconnect(so);
+ }
+ hvs_trans_unlock();
+
+ return;
+}
+
+int
+hvs_trans_shutdown(struct socket *so)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list