svn commit: r253440 - head/usr.sbin/bhyve
Peter Grehan
grehan at FreeBSD.org
Wed Jul 17 23:37:34 UTC 2013
Author: grehan
Date: Wed Jul 17 23:37:33 2013
New Revision: 253440
URL: http://svnweb.freebsd.org/changeset/base/253440
Log:
Major rework of the virtio code. Split out common parts, and modify
the net/block devices accordingly.
Submitted by: Chris Torek torek at torek dot net
Reviewed by: grehan
Added:
head/usr.sbin/bhyve/virtio.c (contents, props changed)
Modified:
head/usr.sbin/bhyve/Makefile
head/usr.sbin/bhyve/pci_virtio_block.c
head/usr.sbin/bhyve/pci_virtio_net.c
head/usr.sbin/bhyve/virtio.h
Modified: head/usr.sbin/bhyve/Makefile
==============================================================================
--- head/usr.sbin/bhyve/Makefile Wed Jul 17 23:29:56 2013 (r253439)
+++ head/usr.sbin/bhyve/Makefile Wed Jul 17 23:37:33 2013 (r253440)
@@ -10,7 +10,7 @@ SRCS= acpi.c atpic.c bhyverun.c consport
SRCS+= ioapic.c mem.c mevent.c mptbl.c
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c
-SRCS+= xmsr.c spinup_ap.c
+SRCS+= virtio.c xmsr.c spinup_ap.c
.PATH: ${.CURDIR}/../../sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
Modified: head/usr.sbin/bhyve/pci_virtio_block.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_block.c Wed Jul 17 23:29:56 2013 (r253439)
+++ head/usr.sbin/bhyve/pci_virtio_block.c Wed Jul 17 23:37:33 2013 (r253440)
@@ -53,14 +53,6 @@ __FBSDID("$FreeBSD$");
#define VTBLK_RINGSZ 64
-#define VTBLK_CFGSZ 28
-
-#define VTBLK_R_CFG VTCFG_R_CFG1
-#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
-#define VTBLK_R_MAX VTBLK_R_CFG_END
-
-#define VTBLK_REGSZ VTBLK_R_MAX+1
-
#define VTBLK_MAXSEGS 32
#define VTBLK_S_OK 0
@@ -71,28 +63,10 @@ __FBSDID("$FreeBSD$");
*/
#define VTBLK_S_HOSTCAPS \
( 0x00000004 | /* host maximum request segments */ \
- 0x10000000 ) /* supports indirect descriptors */
-
-static int use_msix = 1;
-
-struct vring_hqueue {
- /* Internal state */
- uint16_t hq_size;
- uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
-
- /* Host-context pointers to the queue */
- struct virtio_desc *hq_dtable;
- uint16_t *hq_avail_flags;
- uint16_t *hq_avail_idx; /* monotonically increasing */
- uint16_t *hq_avail_ring;
-
- uint16_t *hq_used_flags;
- uint16_t *hq_used_idx; /* monotonically increasing */
- struct virtio_used *hq_used_ring;
-};
+ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
/*
- * Config space
+ * Config space "registers"
*/
struct vtblk_config {
uint64_t vbc_capacity;
@@ -104,7 +78,6 @@ struct vtblk_config {
uint32_t vbc_blk_size;
uint32_t vbc_sectors_max;
} __packed;
-CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
/*
* Fixed-size block header
@@ -129,113 +102,69 @@ static int pci_vtblk_debug;
* Per-device softc
*/
struct pci_vtblk_softc {
- struct pci_devinst *vbsc_pi;
+ struct virtio_softc vbsc_vs;
+ struct vqueue_info vbsc_vq;
int vbsc_fd;
- int vbsc_status;
- int vbsc_isr;
- int vbsc_lastq;
- uint32_t vbsc_features;
- uint64_t vbsc_pfn;
- struct vring_hqueue vbsc_q;
struct vtblk_config vbsc_cfg;
- uint16_t msix_table_idx_req;
- uint16_t msix_table_idx_cfg;
};
-#define vtblk_ctx(sc) ((sc)->vbsc_pi->pi_vmctx)
-
-/*
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or
- * not
- */
-static uint64_t
-pci_vtblk_iosize(struct pci_devinst *pi)
-{
-
- if (pci_msix_enabled(pi))
- return (VTBLK_REGSZ);
- else
- return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
- uint16_t ndesc;
-
- /*
- * We're just computing (a-b) in GF(216).
- *
- * The only glitch here is that in standard C,
- * uint16_t promotes to (signed) int when int has
- * more than 16 bits (pretty much always now), so
- * we have to force it back to unsigned.
- */
- ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
- assert(ndesc <= hq->hq_size);
-
- return (ndesc);
-}
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+ "vtblk", /* our name */
+ 1, /* we support 1 virtqueue */
+ sizeof(struct vtblk_config), /* config reg size */
+ pci_vtblk_reset, /* reset */
+ pci_vtblk_notify, /* device-wide qnotify */
+ pci_vtblk_cfgread, /* read PCI config */
+ pci_vtblk_cfgwrite, /* write PCI config */
+ VTBLK_S_HOSTCAPS, /* our capabilities */
+};
static void
-pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
+pci_vtblk_reset(void *vsc)
{
- if (value == 0) {
- DPRINTF(("vtblk: device reset requested !\n"));
- sc->vbsc_isr = 0;
- sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
- sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
- sc->vbsc_features = 0;
- sc->vbsc_pfn = 0;
- sc->vbsc_lastq = 0;
- memset(&sc->vbsc_q, 0, sizeof(struct vring_hqueue));
- }
+ struct pci_vtblk_softc *sc = vsc;
- sc->vbsc_status = value;
+ DPRINTF(("vtblk: device reset requested !\n"));
+ vi_reset_dev(&sc->vbsc_vs);
}
static void
-pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
{
- struct iovec iov[VTBLK_MAXSEGS];
struct virtio_blk_hdr *vbh;
- struct virtio_desc *vd, *vid;
- struct virtio_used *vu;
uint8_t *status;
- int i;
+ int i, n;
int err;
int iolen;
- int uidx, aidx, didx;
- int indirect, writeop, type;
+ int writeop, type;
off_t offset;
+ struct iovec iov[VTBLK_MAXSEGS + 2];
+ uint16_t flags[VTBLK_MAXSEGS + 2];
- uidx = *hq->hq_used_idx;
- aidx = hq->hq_cur_aidx;
- didx = hq->hq_avail_ring[aidx % hq->hq_size];
- assert(didx >= 0 && didx < hq->hq_size);
-
- vd = &hq->hq_dtable[didx];
-
- indirect = ((vd->vd_flags & VRING_DESC_F_INDIRECT) != 0);
-
- if (indirect) {
- vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len);
- vd = &vid[0];
- }
+ n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags);
/*
- * The first descriptor will be the read-only fixed header
+ * The first descriptor will be the read-only fixed header,
+ * and the last is for status (hence +2 above and below).
+ * The remaining iov's are the actual data I/O vectors.
+ *
+ * XXX - note - this fails on crash dump, which does a
+ * VIRTIO_BLK_T_FLUSH with a zero transfer length
*/
- vbh = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr,
- sizeof(struct virtio_blk_hdr));
- assert(vd->vd_len == sizeof(struct virtio_blk_hdr));
- assert(vd->vd_flags & VRING_DESC_F_NEXT);
- assert((vd->vd_flags & VRING_DESC_F_WRITE) == 0);
+ assert (n >= 3 && n < VTBLK_MAXSEGS + 2);
+
+ assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+ assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+ vbh = iov[0].iov_base;
+
+ status = iov[--n].iov_base;
+ assert(iov[n].iov_len == 1);
+ assert(flags[n] & VRING_DESC_F_WRITE);
/*
* XXX
@@ -247,120 +176,44 @@ pci_vtblk_proc(struct pci_vtblk_softc *s
offset = vbh->vbh_sector * DEV_BSIZE;
- /*
- * Build up the iovec based on the guest's data descriptors
- */
- i = iolen = 0;
- while (1) {
- if (indirect)
- vd = &vid[i + 1]; /* skip first indirect desc */
- else
- vd = &hq->hq_dtable[vd->vd_next];
-
- if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
- break;
-
- if (i == VTBLK_MAXSEGS)
- break;
-
+ iolen = 0;
+ for (i = 1; i < n; i++) {
/*
* - write op implies read-only descriptor,
* - read op implies write-only descriptor,
* therefore test the inverse of the descriptor bit
* to the op.
*/
- assert(((vd->vd_flags & VRING_DESC_F_WRITE) == 0) ==
- writeop);
-
- iov[i].iov_base = paddr_guest2host(vtblk_ctx(sc),
- vd->vd_addr,
- vd->vd_len);
- iov[i].iov_len = vd->vd_len;
- iolen += vd->vd_len;
- i++;
+ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+ iolen += iov[i].iov_len;
}
- /* Lastly, get the address of the status byte */
- status = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, 1);
- assert(vd->vd_len == 1);
- assert((vd->vd_flags & VRING_DESC_F_NEXT) == 0);
- assert(vd->vd_flags & VRING_DESC_F_WRITE);
-
DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
- writeop ? "write" : "read", iolen, i, offset));
+ writeop ? "write" : "read", iolen, i - 1, offset));
if (writeop)
- err = pwritev(sc->vbsc_fd, iov, i, offset);
+ err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset);
else
- err = preadv(sc->vbsc_fd, iov, i, offset);
+ err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset);
*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
/*
- * Return the single descriptor back to the host
+ * Return the descriptor back to the host.
+ * We wrote 1 byte (our status) to host.
*/
- vu = &hq->hq_used_ring[uidx % hq->hq_size];
- vu->vu_idx = didx;
- vu->vu_tlen = 1;
- hq->hq_cur_aidx++;
- *hq->hq_used_idx += 1;
-
- /*
- * Generate an interrupt if able
- */
- if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
- if (use_msix) {
- pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req);
- } else if (sc->vbsc_isr == 0) {
- sc->vbsc_isr = 1;
- pci_generate_msi(sc->vbsc_pi, 0);
- }
- }
+ vq_relchain(vq, 1);
}
static void
-pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
{
- struct vring_hqueue *hq = &sc->vbsc_q;
- int ndescs;
+ struct pci_vtblk_softc *sc = vsc;
- while ((ndescs = hq_num_avail(hq)) != 0) {
- /*
- * Run through all the entries, placing them into iovecs and
- * sending when an end-of-packet is found
- */
- pci_vtblk_proc(sc, hq);
- }
-}
-
-static void
-pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
-{
- struct vring_hqueue *hq;
-
- sc->vbsc_pfn = pfn << VRING_PFN;
-
- /*
- * Set up host pointers to the various parts of the
- * queue
- */
- hq = &sc->vbsc_q;
- hq->hq_size = VTBLK_RINGSZ;
-
- hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN,
- vring_size(VTBLK_RINGSZ));
- hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
- hq->hq_avail_idx = hq->hq_avail_flags + 1;
- hq->hq_avail_ring = hq->hq_avail_flags + 2;
- hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
- VRING_ALIGN);
- hq->hq_used_idx = hq->hq_used_flags + 1;
- hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
-
- /*
- * Initialize queue indexes
- */
- hq->hq_cur_aidx = 0;
+ vq_startchains(vq);
+ while (vq_has_descs(vq))
+ pci_vtblk_proc(sc, vq);
+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
}
static int
@@ -371,6 +224,7 @@ pci_vtblk_init(struct vmctx *ctx, struct
off_t size;
int fd;
int sectsz;
+ int use_msix;
const char *env_msi;
if (opts == NULL) {
@@ -412,10 +266,14 @@ pci_vtblk_init(struct vmctx *ctx, struct
sc = malloc(sizeof(struct pci_vtblk_softc));
memset(sc, 0, sizeof(struct pci_vtblk_softc));
- pi->pi_arg = sc;
- sc->vbsc_pi = pi;
+ /* record fd of storage device/file */
sc->vbsc_fd = fd;
+ /* init virtio softc and virtqueues */
+ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+ sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+ /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
/* setup virtio block config space */
sc->vbsc_cfg.vbc_capacity = size / sectsz;
sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
@@ -426,206 +284,51 @@ pci_vtblk_init(struct vmctx *ctx, struct
sc->vbsc_cfg.vbc_geom_s = 0;
sc->vbsc_cfg.vbc_sectors_max = 0;
- /* initialize config space */
+ /*
+ * Should we move some of this into virtio.c? Could
+ * have the device, class, and subdev_0 as fields in
+ * the virtio constants structure.
+ */
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
+ use_msix = 1;
if ((env_msi = getenv("BHYVE_USE_MSI"))) {
if (strcasecmp(env_msi, "yes") == 0)
use_msix = 0;
}
-
- if (use_msix) {
- /* MSI-X Support */
- sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
- sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
-
- if (pci_emul_add_msixcap(pi, 2, 1))
- return (1);
- } else {
- /* MSI Support */
- pci_emul_add_msicap(pi, 1);
- }
-
- pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
-
+ if (vi_intr_init(&sc->vbsc_vs, 1, use_msix))
+ return (1);
+ vi_set_io_bar(&sc->vbsc_vs, 0);
return (0);
}
-static uint64_t
-vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset)
-{
- /*
- * Device specific offsets used by guest would change
- * based on whether MSI-X capability is enabled or not
- */
- if (!pci_msix_enabled(pi)) {
- if (offset >= VTCFG_R_MSIX)
- return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
- }
-
- return (offset);
-}
-
-static void
-pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int baridx, uint64_t offset, int size, uint64_t value)
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
{
- struct pci_vtblk_softc *sc = pi->pi_arg;
-
- if (use_msix) {
- if (baridx == pci_msix_table_bar(pi) ||
- baridx == pci_msix_pba_bar(pi)) {
- pci_emul_msix_twrite(pi, offset, size, value);
- return;
- }
- }
-
- assert(baridx == 0);
- if (offset + size > pci_vtblk_iosize(pi)) {
- DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
- offset, size));
- return;
- }
-
- offset = vtblk_adjust_offset(pi, offset);
-
- switch (offset) {
- case VTCFG_R_GUESTCAP:
- assert(size == 4);
- sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
- break;
- case VTCFG_R_PFN:
- assert(size == 4);
- pci_vtblk_ring_init(sc, value);
- break;
- case VTCFG_R_QSEL:
- assert(size == 2);
- sc->vbsc_lastq = value;
- break;
- case VTCFG_R_QNOTIFY:
- assert(size == 2);
- assert(value == 0);
- pci_vtblk_qnotify(sc);
- break;
- case VTCFG_R_STATUS:
- assert(size == 1);
- pci_vtblk_update_status(sc, value);
- break;
- case VTCFG_R_CFGVEC:
- assert(size == 2);
- sc->msix_table_idx_cfg = value;
- break;
- case VTCFG_R_QVEC:
- assert(size == 2);
- sc->msix_table_idx_req = value;
- break;
- case VTCFG_R_HOSTCAP:
- case VTCFG_R_QNUM:
- case VTCFG_R_ISR:
- case VTBLK_R_CFG ... VTBLK_R_CFG_END:
- DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
- break;
- default:
- DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
- value = 0;
- break;
- }
+ DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
+ return (1);
}
-uint64_t
-pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int baridx, uint64_t offset, int size)
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
- struct pci_vtblk_softc *sc = pi->pi_arg;
+ struct pci_vtblk_softc *sc = vsc;
void *ptr;
- uint32_t value;
- if (use_msix) {
- if (baridx == pci_msix_table_bar(pi) ||
- baridx == pci_msix_pba_bar(pi)) {
- return (pci_emul_msix_tread(pi, offset, size));
- }
- }
-
- assert(baridx == 0);
-
- if (offset + size > pci_vtblk_iosize(pi)) {
- DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
- offset, size));
- return (0);
- }
-
- offset = vtblk_adjust_offset(pi, offset);
-
- switch (offset) {
- case VTCFG_R_HOSTCAP:
- assert(size == 4);
- value = VTBLK_S_HOSTCAPS;
- break;
- case VTCFG_R_GUESTCAP:
- assert(size == 4);
- value = sc->vbsc_features; /* XXX never read ? */
- break;
- case VTCFG_R_PFN:
- assert(size == 4);
- value = sc->vbsc_pfn >> VRING_PFN;
- break;
- case VTCFG_R_QNUM:
- value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
- break;
- case VTCFG_R_QSEL:
- assert(size == 2);
- value = sc->vbsc_lastq; /* XXX never read ? */
- break;
- case VTCFG_R_QNOTIFY:
- assert(size == 2);
- value = 0; /* XXX never read ? */
- break;
- case VTCFG_R_STATUS:
- assert(size == 1);
- value = sc->vbsc_status;
- break;
- case VTCFG_R_ISR:
- assert(size == 1);
- value = sc->vbsc_isr;
- sc->vbsc_isr = 0; /* a read clears this flag */
- break;
- case VTCFG_R_CFGVEC:
- assert(size == 2);
- value = sc->msix_table_idx_cfg;
- break;
- case VTCFG_R_QVEC:
- assert(size == 2);
- value = sc->msix_table_idx_req;
- break;
- case VTBLK_R_CFG ... VTBLK_R_CFG_END:
- assert(size + offset <= (VTBLK_R_CFG_END + 1));
- ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
- if (size == 1) {
- value = *(uint8_t *) ptr;
- } else if (size == 2) {
- value = *(uint16_t *) ptr;
- } else {
- value = *(uint32_t *) ptr;
- }
- break;
- default:
- DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
- value = 0;
- break;
- }
-
- return (value);
+ /* our caller has already verified offset and size */
+ ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+ memcpy(retval, ptr, size);
+ return (0);
}
struct pci_devemu pci_de_vblk = {
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
- .pe_barwrite = pci_vtblk_write,
- .pe_barread = pci_vtblk_read
+ .pe_barwrite = vi_pci_write,
+ .pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_vblk);
Modified: head/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- head/usr.sbin/bhyve/pci_virtio_net.c Wed Jul 17 23:29:56 2013 (r253439)
+++ head/usr.sbin/bhyve/pci_virtio_net.c Wed Jul 17 23:37:33 2013 (r253440)
@@ -59,56 +59,49 @@ __FBSDID("$FreeBSD$");
#define VTNET_MAXSEGS 32
/*
- * PCI config-space register offsets
+ * Host capabilities. Note that we only offer a few of these.
*/
-#define VTNET_R_CFG0 24
-#define VTNET_R_CFG1 25
-#define VTNET_R_CFG2 26
-#define VTNET_R_CFG3 27
-#define VTNET_R_CFG4 28
-#define VTNET_R_CFG5 29
-#define VTNET_R_CFG6 30
-#define VTNET_R_CFG7 31
-#define VTNET_R_MAX 31
+#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
+#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
+#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
+#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
+#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
+#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
+#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
+#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
+#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
+#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
+#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
+#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
+#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
+#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
+#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
+#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE \
+ (1 << 21) /* guest can send gratuitous pkts */
-#define VTNET_REGSZ VTNET_R_MAX+1
+#define VTNET_S_HOSTCAPS \
+ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
+ VIRTIO_F_NOTIFY_ON_EMPTY)
/*
- * Host capabilities
+ * PCI config-space "registers"
*/
-#define VTNET_S_HOSTCAPS \
- ( 0x00000020 | /* host supplies MAC */ \
- 0x00008000 | /* host can merge Rx buffers */ \
- 0x00010000 | /* config status available */ \
- VIRTIO_F_NOTIFY_ON_EMPTY)
+struct virtio_net_config {
+ uint8_t mac[6];
+ uint16_t status;
+} __packed;
/*
* Queue definitions.
*/
#define VTNET_RXQ 0
#define VTNET_TXQ 1
-#define VTNET_CTLQ 2
+#define VTNET_CTLQ 2 /* NB: not yet supported */
#define VTNET_MAXQ 3
-static int use_msix = 1;
-
-struct vring_hqueue {
- /* Internal state */
- uint16_t hq_size;
- uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
-
- /* Host-context pointers to the queue */
- struct virtio_desc *hq_dtable;
- uint16_t *hq_avail_flags;
- uint16_t *hq_avail_idx; /* monotonically increasing */
- uint16_t *hq_avail_ring;
-
- uint16_t *hq_used_flags;
- uint16_t *hq_used_idx; /* monotonically increasing */
- struct virtio_used *hq_used_ring;
-};
-
/*
* Fixed network header size
*/
@@ -133,23 +126,17 @@ static int pci_vtnet_debug;
* Per-device softc
*/
struct pci_vtnet_softc {
- struct pci_devinst *vsc_pi;
+ struct virtio_softc vsc_vs;
+ struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
pthread_mutex_t vsc_mtx;
struct mevent *vsc_mevp;
- int vsc_curq;
- int vsc_status;
- int vsc_isr;
int vsc_tapfd;
int vsc_rx_ready;
- int resetting;
+ volatile int resetting; /* set and checked outside lock */
uint32_t vsc_features;
- uint8_t vsc_macaddr[6];
-
- uint64_t vsc_pfn[VTNET_MAXQ];
- struct vring_hqueue vsc_hq[VTNET_MAXQ];
- uint16_t vsc_msix_table_idx[VTNET_MAXQ];
+ struct virtio_net_config vsc_config;
pthread_mutex_t rx_mtx;
int rx_in_progress;
@@ -159,73 +146,22 @@ struct pci_vtnet_softc {
pthread_cond_t tx_cond;
int tx_in_progress;
};
-#define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx)
-#define notify_on_empty(sc) ((sc)->vsc_features & VIRTIO_F_NOTIFY_ON_EMPTY)
-
-/*
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or
- * not.
- */
-static uint64_t
-pci_vtnet_iosize(struct pci_devinst *pi)
-{
- if (pci_msix_enabled(pi))
- return (VTNET_REGSZ);
- else
- return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
-
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
- uint16_t ndesc;
- /*
- * We're just computing (a-b) mod 2^16
- *
- * The only glitch here is that in standard C,
- * uint16_t promotes to (signed) int when int has
- * more than 16 bits (pretty much always now), so
- * we have to force it back to unsigned.
- */
- ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
- assert(ndesc <= hq->hq_size);
-
- return (ndesc);
-}
-
-static uint16_t
-pci_vtnet_qsize(int qnum)
-{
- /* XXX no ctl queue currently */
- if (qnum == VTNET_CTLQ) {
- return (0);
- }
-
- /* XXX fixed currently. Maybe different for tx/rx/ctl */
- return (VTNET_RINGSZ);
-}
-
-static void
-pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring)
-{
- struct vring_hqueue *hq;
-
- assert(ring < VTNET_MAXQ);
-
- hq = &sc->vsc_hq[ring];
-
- /*
- * Reset all soft state
- */
- hq->hq_cur_aidx = 0;
-}
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+ "vtnet", /* our name */
+ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
+ sizeof(struct virtio_net_config), /* config reg size */
+ pci_vtnet_reset, /* reset */
+ NULL, /* device-wide qnotify -- not used */
+ pci_vtnet_cfgread, /* read PCI config */
+ pci_vtnet_cfgwrite, /* write PCI config */
+ VTNET_S_HOSTCAPS, /* our capabilities */
+};
/*
* If the transmit thread is active then stall until it is done.
@@ -260,48 +196,27 @@ pci_vtnet_rxwait(struct pci_vtnet_softc
}
static void
-pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
+pci_vtnet_reset(void *vsc)
{
- int i;
-
- if (value == 0) {
- DPRINTF(("vtnet: device reset requested !\n"));
-
- sc->resetting = 1;
-
- /*
- * Wait for the transmit and receive threads to finish their
- * processing.
- */
- pci_vtnet_txwait(sc);
- pci_vtnet_rxwait(sc);
+ struct pci_vtnet_softc *sc = vsc;
- sc->vsc_rx_ready = 0;
- pci_vtnet_ring_reset(sc, VTNET_RXQ);
- pci_vtnet_ring_reset(sc, VTNET_TXQ);
+ DPRINTF(("vtnet: device reset requested !\n"));
- for (i = 0; i < VTNET_MAXQ; i++)
- sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR;
+ sc->resetting = 1;
- sc->vsc_isr = 0;
- sc->vsc_features = 0;
+ /*
+ * Wait for the transmit and receive threads to finish their
+ * processing.
+ */
+ pci_vtnet_txwait(sc);
+ pci_vtnet_rxwait(sc);
- sc->resetting = 0;
- }
+ sc->vsc_rx_ready = 0;
- sc->vsc_status = value;
-}
+ /* now reset rings, MSI-X vectors, and negotiated capabilities */
+ vi_reset_dev(&sc->vsc_vs);
-static void
-vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx)
-{
-
- if (use_msix) {
- pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]);
- } else {
- sc->vsc_isr |= 1;
- pci_generate_msi(sc->vsc_pi, 0);
- }
+ sc->resetting = 0;
}
/*
@@ -311,7 +226,7 @@ static void
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
- char pad[60];
+ static char pad[60]; /* all zero bytes */
if (sc->vsc_tapfd == -1)
return;
@@ -322,7 +237,6 @@ pci_vtnet_tap_tx(struct pci_vtnet_softc
* there is always an extra iov available by the caller.
*/
if (len < 60) {
- memset(pad, 0, 60 - len);
iov[iovcnt].iov_base = pad;
iov[iovcnt].iov_len = 60 - len;
iovcnt++;
@@ -342,15 +256,11 @@ static uint8_t dummybuf[2048];
static void
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
{
- struct virtio_desc *vd;
- struct virtio_used *vu;
- struct vring_hqueue *hq;
+ struct vqueue_info *vq;
struct virtio_net_rxhdr *vrx;
uint8_t *buf;
- int i;
int len;
- int ndescs;
- int didx, uidx, aidx; /* descriptor, avail and used index */
+ struct iovec iov;
/*
* Should never be called without a valid tap fd
@@ -370,47 +280,45 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc
}
/*
- * Calculate the number of available rx buffers
+ * Check for available rx buffers
*/
- hq = &sc->vsc_hq[VTNET_RXQ];
-
- ndescs = hq_num_avail(hq);
-
- if (ndescs == 0) {
+ vq = &sc->vsc_queues[VTNET_RXQ];
+ vq_startchains(vq);
+ if (!vq_has_descs(vq)) {
/*
- * Drop the packet and try later
+ * Drop the packet and try later. Interrupt on
+ * empty, if that's negotiated.
*/
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
-
- if (notify_on_empty(sc))
- vtnet_generate_interrupt(sc, VTNET_RXQ);
-
+ vq_endchains(vq, 1);
return;
}
- aidx = hq->hq_cur_aidx;
- uidx = *hq->hq_used_idx;
- for (i = 0; i < ndescs; i++) {
+ do {
/*
- * 'aidx' indexes into the an array of descriptor indexes
+ * Get descriptor chain, which should have just
+ * one descriptor in it.
+ * ??? allow guests to use multiple descs?
*/
- didx = hq->hq_avail_ring[aidx % hq->hq_size];
- assert(didx >= 0 && didx < hq->hq_size);
-
- vd = &hq->hq_dtable[didx];
+ assert(vq_getchain(vq, &iov, 1, NULL) == 1);
/*
* Get a pointer to the rx header, and use the
* data immediately following it for the packet buffer.
*/
- vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len);
+ vrx = iov.iov_base;
buf = (uint8_t *)(vrx + 1);
len = read(sc->vsc_tapfd, buf,
- vd->vd_len - sizeof(struct virtio_net_rxhdr));
+ iov.iov_len - sizeof(struct virtio_net_rxhdr));
if (len < 0 && errno == EWOULDBLOCK) {
- break;
+ /*
+ * No more packets, but still some avail ring
+ * entries. Interrupt if needed/appropriate.
+ */
+ vq_endchains(vq, 0);
+ return;
}
/*
@@ -422,23 +330,13 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc
vrx->vrh_bufs = 1;
/*
- * Write this descriptor into the used ring
+ * Release this chain and handle more chains.
*/
- vu = &hq->hq_used_ring[uidx % hq->hq_size];
- vu->vu_idx = didx;
- vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
- uidx++;
- aidx++;
- }
+ vq_relchain(vq, len + sizeof(struct virtio_net_rxhdr));
+ } while (vq_has_descs(vq));
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list