svn commit: r358185 - stable/12/usr.sbin/bhyve
Vincenzo Maffione
vmaffione at FreeBSD.org
Thu Feb 20 21:52:38 UTC 2020
Author: vmaffione
Date: Thu Feb 20 21:52:36 2020
New Revision: 358185
URL: https://svnweb.freebsd.org/changeset/base/358185
Log:
MFC r357846
bhyve: move virtio-net header processing to pci_virtio_net
This patch cleans up the API between the net frontends (e1000,
virtio-net) and the net backends (tap and netmap).
We move the virtio-net header stripping/prepending to the
virtio-net code, where this functionality belongs.
In this way, the netbe_send() and netbe_recv() signatures
can have const struct iov * rather than struct iov *.
Reviewed by: grehan, bcr, aleksandr.fedorov at itglobal.com
Differential Revision: https://reviews.freebsd.org/D23342
Modified:
stable/12/usr.sbin/bhyve/net_backends.c
stable/12/usr.sbin/bhyve/net_backends.h
stable/12/usr.sbin/bhyve/net_utils.c
stable/12/usr.sbin/bhyve/pci_e82545.c
stable/12/usr.sbin/bhyve/pci_virtio_net.c
Modified: stable/12/usr.sbin/bhyve/net_backends.c
==============================================================================
--- stable/12/usr.sbin/bhyve/net_backends.c Thu Feb 20 21:48:36 2020 (r358184)
+++ stable/12/usr.sbin/bhyve/net_backends.c Thu Feb 20 21:52:36 2020 (r358185)
@@ -99,7 +99,8 @@ struct net_backend {
* vector provided by the caller has 'iovcnt' elements and contains
* the packet to send.
*/
- ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
+ ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
+ int iovcnt);
/*
* Called to receive a packet from the backend. When the function
@@ -108,7 +109,8 @@ struct net_backend {
* The function returns 0 if the backend doesn't have a new packet to
* receive.
*/
- ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
+ ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
+ int iovcnt);
/*
* Ask the backend to enable or disable receive operation in the
@@ -238,13 +240,13 @@ error:
* Called to send a buffer chain out to the tap device
*/
static ssize_t
-tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
+tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
return (writev(be->fd, iov, iovcnt));
}
static ssize_t
-tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
+tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
ssize_t ret;
@@ -458,7 +460,7 @@ netmap_cleanup(struct net_backend *be)
}
static ssize_t
-netmap_send(struct net_backend *be, struct iovec *iov,
+netmap_send(struct net_backend *be, const struct iovec *iov,
int iovcnt)
{
struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
@@ -538,7 +540,7 @@ txsync:
}
static ssize_t
-netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
+netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
struct netmap_slot *slot = NULL;
@@ -749,42 +751,10 @@ netbe_set_cap(struct net_backend *be, uint64_t feature
return (ret);
}
-static __inline struct iovec *
-iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
-{
- struct iovec *riov;
-
- /* XXX short-cut: assume first segment is >= tlen */
- assert(iov[0].iov_len >= tlen);
-
- iov[0].iov_len -= tlen;
- if (iov[0].iov_len == 0) {
- assert(*iovcnt > 1);
- *iovcnt -= 1;
- riov = &iov[1];
- } else {
- iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
- riov = &iov[0];
- }
-
- return (riov);
-}
-
ssize_t
-netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
+netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
- assert(be != NULL);
- if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
- /*
- * The frontend uses a virtio-net header, but the backend
- * does not. We ignore it (as it must be all zeroes) and
- * strip it.
- */
- assert(be->be_vnet_hdr_len == 0);
- iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
- }
-
return (be->send(be, iov, iovcnt));
}
@@ -794,46 +764,10 @@ netbe_send(struct net_backend *be, struct iovec *iov,
* the length of the packet just read. Return -1 in case of errors.
*/
ssize_t
-netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
+netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
- /* Length of prepended virtio-net header. */
- unsigned int hlen = be->fe_vnet_hdr_len;
- int ret;
- assert(be != NULL);
-
- if (hlen && hlen != be->be_vnet_hdr_len) {
- /*
- * The frontend uses a virtio-net header, but the backend
- * does not. We need to prepend a zeroed header.
- */
- struct virtio_net_rxhdr *vh;
-
- assert(be->be_vnet_hdr_len == 0);
-
- /*
- * Get a pointer to the rx header, and use the
- * data immediately following it for the packet buffer.
- */
- vh = iov[0].iov_base;
- iov = iov_trim(iov, &iovcnt, hlen);
-
- /*
- * The only valid field in the rx packet header is the
- * number of buffers if merged rx bufs were negotiated.
- */
- memset(vh, 0, hlen);
- if (hlen == VNET_HDR_LEN) {
- vh->vrh_bufs = 1;
- }
- }
-
- ret = be->recv(be, iov, iovcnt);
- if (ret > 0) {
- ret += hlen;
- }
-
- return (ret);
+ return (be->recv(be, iov, iovcnt));
}
/*
@@ -870,4 +804,11 @@ netbe_rx_enable(struct net_backend *be)
{
return be->recv_enable(be);
+}
+
+size_t
+netbe_get_vnet_hdr_len(struct net_backend *be)
+{
+
+ return (be->be_vnet_hdr_len);
}
Modified: stable/12/usr.sbin/bhyve/net_backends.h
==============================================================================
--- stable/12/usr.sbin/bhyve/net_backends.h Thu Feb 20 21:48:36 2020 (r358184)
+++ stable/12/usr.sbin/bhyve/net_backends.h Thu Feb 20 21:52:36 2020 (r358185)
@@ -43,8 +43,9 @@ void netbe_cleanup(net_backend_t *be);
uint64_t netbe_get_cap(net_backend_t *be);
int netbe_set_cap(net_backend_t *be, uint64_t cap,
unsigned vnet_hdr_len);
-ssize_t netbe_send(net_backend_t *be, struct iovec *iov, int iovcnt);
-ssize_t netbe_recv(net_backend_t *be, struct iovec *iov, int iovcnt);
+size_t netbe_get_vnet_hdr_len(net_backend_t *be);
+ssize_t netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
+ssize_t netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
ssize_t netbe_rx_discard(net_backend_t *be);
void netbe_rx_disable(net_backend_t *be);
void netbe_rx_enable(net_backend_t *be);
Modified: stable/12/usr.sbin/bhyve/net_utils.c
==============================================================================
--- stable/12/usr.sbin/bhyve/net_utils.c Thu Feb 20 21:48:36 2020 (r358184)
+++ stable/12/usr.sbin/bhyve/net_utils.c Thu Feb 20 21:52:36 2020 (r358185)
@@ -43,21 +43,19 @@ int
net_parsemac(char *mac_str, uint8_t *mac_addr)
{
struct ether_addr *ea;
- char *tmpstr;
char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
- tmpstr = strsep(&mac_str,"=");
+ if (mac_str == NULL)
+ return (EINVAL);
- if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
- ea = ether_aton(mac_str);
+ ea = ether_aton(mac_str);
- if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
- memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
- EPRINTLN("Invalid MAC %s", mac_str);
- return (EINVAL);
- } else
- memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
- }
+ if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+ memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+ EPRINTLN("Invalid MAC %s", mac_str);
+ return (EINVAL);
+ } else
+ memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
return (0);
}
Modified: stable/12/usr.sbin/bhyve/pci_e82545.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_e82545.c Thu Feb 20 21:48:36 2020 (r358184)
+++ stable/12/usr.sbin/bhyve/pci_e82545.c Thu Feb 20 21:52:36 2020 (r358185)
@@ -2328,18 +2328,36 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi,
mac_provided = 0;
sc->esc_be = NULL;
if (opts != NULL) {
- int err;
+ int err = 0;
devname = vtopts = strdup(opts);
(void) strsep(&vtopts, ",");
- if (vtopts != NULL) {
- err = net_parsemac(vtopts, sc->esc_mac.octet);
- if (err != 0) {
- free(devname);
- return (err);
+ /*
+ * Parse the list of options in the form
+ * key1=value1,...,keyN=valueN.
+ */
+ while (vtopts != NULL) {
+ char *value = vtopts;
+ char *key;
+
+ key = strsep(&value, "=");
+ if (value == NULL)
+ break;
+ vtopts = value;
+ (void) strsep(&vtopts, ",");
+
+ if (strcmp(key, "mac") == 0) {
+ err = net_parsemac(value, sc->esc_mac.octet);
+ if (err)
+ break;
+ mac_provided = 1;
}
- mac_provided = 1;
+ }
+
+ if (err) {
+ free(devname);
+ return (err);
}
err = netbe_init(&sc->esc_be, devname, e82545_rx_callback, sc);
Modified: stable/12/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_virtio_net.c Thu Feb 20 21:48:36 2020 (r358184)
+++ stable/12/usr.sbin/bhyve/pci_virtio_net.c Thu Feb 20 21:52:36 2020 (r358185)
@@ -117,6 +117,9 @@ struct pci_vtnet_softc {
pthread_cond_t tx_cond;
int tx_in_progress;
+ size_t vhdrlen;
+ size_t be_vhdrlen;
+
struct virtio_net_config vsc_config;
struct virtio_consts vsc_consts;
};
@@ -180,6 +183,38 @@ pci_vtnet_reset(void *vsc)
pthread_mutex_unlock(&sc->rx_mtx);
}
+static __inline struct iovec *
+iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
+{
+ struct iovec *riov;
+
+ if (iov[0].iov_len < hlen) {
+ /*
+ * Not enough header space in the first fragment.
+ * That's not ok for us.
+ */
+ return NULL;
+ }
+
+ iov[0].iov_len -= hlen;
+ if (iov[0].iov_len == 0) {
+ *iovcnt -= 1;
+ if (*iovcnt == 0) {
+ /*
+ * Only space for the header. That's not
+ * enough for us.
+ */
+ return NULL;
+ }
+ riov = &iov[1];
+ } else {
+ iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
+ riov = &iov[0];
+ }
+
+ return (riov);
+}
+
struct virtio_mrg_rxbuf_info {
uint16_t idx;
uint16_t pad;
@@ -189,31 +224,34 @@ struct virtio_mrg_rxbuf_info {
static void
pci_vtnet_rx(struct pci_vtnet_softc *sc)
{
+ int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
struct iovec iov[VTNET_MAXSEGS + 1];
struct vqueue_info *vq;
- uint32_t cur_iov_bytes;
- struct iovec *cur_iov;
- uint16_t cur_iov_len;
+ uint32_t riov_bytes;
+ struct iovec *riov;
+ int riov_len;
uint32_t ulen;
int n_chains;
int len;
vq = &sc->vsc_queues[VTNET_RXQ];
for (;;) {
+ struct virtio_net_rxhdr *hdr;
+
/*
* Get a descriptor chain to store the next ingress
* packet. In case of mergeable rx buffers, get as
* many chains as necessary in order to make room
* for a maximum sized LRO packet.
*/
- cur_iov_bytes = 0;
- cur_iov_len = 0;
- cur_iov = iov;
+ riov_bytes = 0;
+ riov_len = 0;
+ riov = iov;
n_chains = 0;
do {
- int n = vq_getchain(vq, &info[n_chains].idx, cur_iov,
- VTNET_MAXSEGS - cur_iov_len, NULL);
+ int n = vq_getchain(vq, &info[n_chains].idx, riov,
+ VTNET_MAXSEGS - riov_len, NULL);
if (n == 0) {
/*
@@ -239,21 +277,43 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
vq_kick_disable(vq);
continue;
}
- assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS);
- cur_iov_len += n;
+ assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
+ riov_len += n;
if (!sc->rx_merge) {
n_chains = 1;
break;
}
- info[n_chains].len = (uint32_t)count_iov(cur_iov, n);
- cur_iov_bytes += info[n_chains].len;
- cur_iov += n;
+ info[n_chains].len = (uint32_t)count_iov(riov, n);
+ riov_bytes += info[n_chains].len;
+ riov += n;
n_chains++;
- } while (cur_iov_bytes < VTNET_MAX_PKT_LEN &&
- cur_iov_len < VTNET_MAXSEGS);
+ } while (riov_bytes < VTNET_MAX_PKT_LEN &&
+ riov_len < VTNET_MAXSEGS);
- len = netbe_recv(sc->vsc_be, iov, cur_iov_len);
+ riov = iov;
+ hdr = riov[0].iov_base;
+ if (prepend_hdr_len > 0) {
+ /*
+ * The frontend uses a virtio-net header, but the
+ * backend does not. We need to prepend a zeroed
+ * header.
+ */
+ riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
+ if (riov == NULL) {
+ /*
+ * The first collected chain is nonsensical,
+ * as it is not even enough to store the
+ * virtio-net header. Just drop it.
+ */
+ vq_relchain(vq, info[0].idx, 0);
+ vq_retchains(vq, n_chains - 1);
+ continue;
+ }
+ memset(hdr, 0, prepend_hdr_len);
+ }
+ len = netbe_recv(sc->vsc_be, riov, riov_len);
+
if (len <= 0) {
/*
* No more packets (len == 0), or backend errored
@@ -266,18 +326,18 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
return;
}
- ulen = (uint32_t)len; /* avoid too many casts below */
+ ulen = (uint32_t)(len + prepend_hdr_len);
- /* Publish the used buffers to the guest. */
+ /*
+ * Publish the used buffers to the guest, reporting the
+ * number of bytes that we wrote.
+ */
if (!sc->rx_merge) {
vq_relchain(vq, info[0].idx, ulen);
} else {
- struct virtio_net_rxhdr *hdr = iov[0].iov_base;
uint32_t iolen;
int i = 0;
- assert(iov[0].iov_len >= sizeof(*hdr));
-
do {
iolen = info[i].len;
if (iolen > ulen) {
@@ -333,6 +393,7 @@ static void
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
{
struct iovec iov[VTNET_MAXSEGS + 1];
+ struct iovec *siov = iov;
uint16_t idx;
ssize_t len;
int n;
@@ -344,10 +405,34 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vq
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
assert(n >= 1 && n <= VTNET_MAXSEGS);
- len = netbe_send(sc->vsc_be, iov, n);
+ if (sc->vhdrlen != sc->be_vhdrlen) {
+ /*
+ * The frontend uses a virtio-net header, but the backend
+ * does not. We simply strip the header and ignore it, as
+ * it should be zero-filled.
+ */
+ siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
+ }
- /* chain is processed, release it and set len */
- vq_relchain(vq, idx, len > 0 ? len : 0);
+ if (siov == NULL) {
+ /* The chain is nonsensical. Just drop it. */
+ len = 0;
+ } else {
+ len = netbe_send(sc->vsc_be, siov, n);
+ if (len < 0) {
+ /*
+ * If send failed, report that 0 bytes
+ * were read.
+ */
+ len = 0;
+ }
+ }
+
+ /*
+ * Return the processed chain to the guest, reporting
+ * the number of bytes that we read.
+ */
+ vq_relchain(vq, idx, len);
}
/* Called on TX kick. */
@@ -466,21 +551,40 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *
if (opts != NULL) {
char *devname;
char *vtopts;
- int err;
+ int err = 0;
+ /* Get the device name. */
devname = vtopts = strdup(opts);
(void) strsep(&vtopts, ",");
- if (vtopts != NULL) {
- err = net_parsemac(vtopts, sc->vsc_config.mac);
- if (err != 0) {
- free(devname);
- free(sc);
- return (err);
+ /*
+ * Parse the list of options in the form
+ * key1=value1,...,keyN=valueN.
+ */
+ while (vtopts != NULL) {
+ char *value = vtopts;
+ char *key;
+
+ key = strsep(&value, "=");
+ if (value == NULL)
+ break;
+ vtopts = value;
+ (void) strsep(&vtopts, ",");
+
+ if (strcmp(key, "mac") == 0) {
+ err = net_parsemac(value, sc->vsc_config.mac);
+ if (err)
+ break;
+ mac_provided = 1;
}
- mac_provided = 1;
}
+ if (err) {
+ free(devname);
+ free(sc);
+ return (err);
+ }
+
err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
sc);
free(devname);
@@ -520,6 +624,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *
sc->resetting = 0;
sc->rx_merge = 0;
+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
pthread_mutex_init(&sc->rx_mtx, NULL);
/*
@@ -574,24 +679,25 @@ static void
pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
{
struct pci_vtnet_softc *sc = vsc;
- unsigned int rx_vhdrlen;
sc->vsc_features = negotiated_features;
if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
- rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
sc->rx_merge = 1;
} else {
/*
* Without mergeable rx buffers, virtio-net header is 2
* bytes shorter than sizeof(struct virtio_net_rxhdr).
*/
- rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
sc->rx_merge = 0;
}
/* Tell the backend to enable some capabilities it has advertised. */
- netbe_set_cap(sc->vsc_be, negotiated_features, rx_vhdrlen);
+ netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
+ sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
+ assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen);
}
static struct pci_devemu pci_de_vnet = {
More information about the svn-src-stable-12
mailing list