svn commit: r295124 - in stable/10: lib/libvmmapi share/examples/bhyve sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel sys/amd64/vmm/io sys/sys usr.sbin/bhyve usr.sbin/bhyvect...
Yamagi Burmeister
lists at yamagi.org
Mon Feb 1 18:16:44 UTC 2016
And huge thanks to you for committing this.
On Mon, 1 Feb 2016 14:56:11 +0000 (UTC)
Peter Grehan <grehan at FreeBSD.org> wrote:
> Author: grehan
> Date: Mon Feb 1 14:56:11 2016
> New Revision: 295124
> URL: https://svnweb.freebsd.org/changeset/base/295124
>
> Log:
> MFC r284539, r284630, r284688, r284877, r285217, r285218,
> r286837, r286838, r288470, r288522, r288524, r288826,
> r289001
>
> Pull in bhyve bug fixes and changes to allow UEFI booting.
> This provides Windows support.
>
> Tested on Intel and AMD with:
> - Arch Linux i386+amd64 (kernel 4.3.3)
> - Ubuntu 15.10 server 64-bit
> - FreeBSD-CURRENT/amd64 20160127 snap
> - FreeBSD 10.2 i386+amd64
> - OpenBSD 5.8 i386+amd64
> - SmartOS latest
> - Windows 10 build 1511'
>
> Huge thanks to Yamagi Burmeister who submitted the patch
> and did the majority of the testing.
>
> r284539 - bootrom mem allocation support
> r284630 - Add SO_REUSEADDR when starting debug port
> r284688 - Fix a regression in "movs" emulation
> r284877 - verify_gla() non-zero segment base fix
> r285217 - Always assert DCD and DSR in the uart
> r285218 - devmem nodes moved to /dev/vmm.io/
> r286837 - Add define for SATA Check-Power-Mode
> r286838 - Add simple (no-op) SATA cmd emulations
> r288470 - Increase virtio-blk indirect descs
> r288522 - Firmware guest query interface
> r288524 - Fix post-test typo
> r288826 - Clean up SATA unimplemented cmd msg
> r289001 - Add -l option to specify userboot path
>
> Submitted by: Yamagi Burmeister
> Approved by: re (kib)
>
> Added:
> stable/10/usr.sbin/bhyve/bootrom.c
> - copied unchanged from r284539, head/usr.sbin/bhyve/bootrom.c
> stable/10/usr.sbin/bhyve/bootrom.h
> - copied unchanged from r284539, head/usr.sbin/bhyve/bootrom.h
> stable/10/usr.sbin/bhyve/fwctl.c
> - copied, changed from r288522, head/usr.sbin/bhyve/fwctl.c
> stable/10/usr.sbin/bhyve/fwctl.h
> - copied unchanged from r288522, head/usr.sbin/bhyve/fwctl.h
> Modified:
> stable/10/lib/libvmmapi/vmmapi.c
> stable/10/lib/libvmmapi/vmmapi.h
> stable/10/share/examples/bhyve/vmrun.sh
> stable/10/sys/amd64/include/vmm.h
> stable/10/sys/amd64/include/vmm_dev.h
> stable/10/sys/amd64/vmm/amd/svm.c
> stable/10/sys/amd64/vmm/intel/vmx.c
> stable/10/sys/amd64/vmm/io/ppt.c
> stable/10/sys/amd64/vmm/vmm.c
> stable/10/sys/amd64/vmm/vmm_dev.c
> stable/10/sys/amd64/vmm/vmm_instruction_emul.c
> stable/10/sys/amd64/vmm/vmm_mem.c
> stable/10/sys/amd64/vmm/vmm_mem.h
> stable/10/sys/sys/ata.h
> stable/10/usr.sbin/bhyve/Makefile
> stable/10/usr.sbin/bhyve/bhyve.8
> stable/10/usr.sbin/bhyve/bhyverun.c
> stable/10/usr.sbin/bhyve/dbgport.c
> stable/10/usr.sbin/bhyve/pci_ahci.c
> stable/10/usr.sbin/bhyve/pci_lpc.c
> stable/10/usr.sbin/bhyve/pci_lpc.h
> stable/10/usr.sbin/bhyve/pci_passthru.c
> stable/10/usr.sbin/bhyve/pci_virtio_net.c
> stable/10/usr.sbin/bhyve/uart_emul.c
> stable/10/usr.sbin/bhyvectl/bhyvectl.c
> stable/10/usr.sbin/bhyveload/bhyveload.8
> stable/10/usr.sbin/bhyveload/bhyveload.c
> Directory Properties:
> stable/10/ (props changed)
>
> Modified: stable/10/lib/libvmmapi/vmmapi.c
> ==============================================================================
> --- stable/10/lib/libvmmapi/vmmapi.c Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/lib/libvmmapi/vmmapi.c Mon Feb 1 14:56:11 2016 (r295124)
> @@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$");
> #define MB (1024 * 1024UL)
> #define GB (1024 * 1024 * 1024UL)
>
> +/*
> + * Size of the guard region before and after the virtual address space
> + * mapping the guest physical memory. This must be a multiple of the
> + * superpage size for performance reasons.
> + */
> +#define VM_MMAP_GUARD_SIZE (4 * MB)
> +
> +#define PROT_RW (PROT_READ | PROT_WRITE)
> +#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
> +
> struct vmctx {
> int fd;
> uint32_t lowmem_limit;
> - enum vm_mmap_style vms;
> int memflags;
> size_t lowmem;
> - char *lowmem_addr;
> size_t highmem;
> - char *highmem_addr;
> + char *baseaddr;
> char *name;
> };
>
> @@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, siz
> return (error);
> }
>
> -int
> -vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
> - int *wired)
> -{
> - int error;
> - struct vm_memory_segment seg;
> -
> - bzero(&seg, sizeof(seg));
> - seg.gpa = gpa;
> - error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
> - *ret_len = seg.len;
> - if (wired != NULL)
> - *wired = seg.wired;
> - return (error);
> -}
> -
> uint32_t
> vm_get_lowmem_limit(struct vmctx *ctx)
> {
> @@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int f
> ctx->memflags = flags;
> }
>
> +int
> +vm_get_memflags(struct vmctx *ctx)
> +{
> +
> + return (ctx->memflags);
> +}
> +
> +/*
> + * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
> + */
> +int
> +vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
> + size_t len, int prot)
> +{
> + struct vm_memmap memmap;
> + int error, flags;
> +
> + memmap.gpa = gpa;
> + memmap.segid = segid;
> + memmap.segoff = off;
> + memmap.len = len;
> + memmap.prot = prot;
> + memmap.flags = 0;
> +
> + if (ctx->memflags & VM_MEM_F_WIRED)
> + memmap.flags |= VM_MEMMAP_F_WIRED;
> +
> + /*
> + * If this mapping already exists then don't create it again. This
> + * is the common case for SYSMEM mappings created by bhyveload(8).
> + */
> + error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
> + if (error == 0 && gpa == memmap.gpa) {
> + if (segid != memmap.segid || off != memmap.segoff ||
> + prot != memmap.prot || flags != memmap.flags) {
> + errno = EEXIST;
> + return (-1);
> + } else {
> + return (0);
> + }
> + }
> +
> + error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
> + return (error);
> +}
> +
> +int
> +vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
> + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
> +{
> + struct vm_memmap memmap;
> + int error;
> +
> + bzero(&memmap, sizeof(struct vm_memmap));
> + memmap.gpa = *gpa;
> + error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
> + if (error == 0) {
> + *gpa = memmap.gpa;
> + *segid = memmap.segid;
> + *segoff = memmap.segoff;
> + *len = memmap.len;
> + *prot = memmap.prot;
> + *flags = memmap.flags;
> + }
> + return (error);
> +}
> +
> +/*
> + * Return 0 if the segments are identical and non-zero otherwise.
> + *
> + * This is slightly complicated by the fact that only device memory segments
> + * are named.
> + */
> static int
> -setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr)
> +cmpseg(size_t len, const char *str, size_t len2, const char *str2)
> {
> - int error, mmap_flags;
> - struct vm_memory_segment seg;
> +
> + if (len == len2) {
> + if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
> + return (0);
> + }
> + return (-1);
> +}
> +
> +static int
> +vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
> +{
> + struct vm_memseg memseg;
> + size_t n;
> + int error;
>
> /*
> - * Create and optionally map 'len' bytes of memory at guest
> - * physical address 'gpa'
> + * If the memory segment has already been created then just return.
> + * This is the usual case for the SYSMEM segment created by userspace
> + * loaders like bhyveload(8).
> */
> - bzero(&seg, sizeof(seg));
> - seg.gpa = gpa;
> - seg.len = len;
> - error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
> - if (error == 0 && addr != NULL) {
> - mmap_flags = MAP_SHARED;
> - if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
> - mmap_flags |= MAP_NOCORE;
> - *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags,
> - ctx->fd, gpa);
> + error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
> + sizeof(memseg.name));
> + if (error)
> + return (error);
> +
> + if (memseg.len != 0) {
> + if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
> + errno = EINVAL;
> + return (-1);
> + } else {
> + return (0);
> + }
> + }
> +
> + bzero(&memseg, sizeof(struct vm_memseg));
> + memseg.segid = segid;
> + memseg.len = len;
> + if (name != NULL) {
> + n = strlcpy(memseg.name, name, sizeof(memseg.name));
> + if (n >= sizeof(memseg.name)) {
> + errno = ENAMETOOLONG;
> + return (-1);
> + }
> + }
> +
> + error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
> + return (error);
> +}
> +
> +int
> +vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
> + size_t bufsize)
> +{
> + struct vm_memseg memseg;
> + size_t n;
> + int error;
> +
> + memseg.segid = segid;
> + error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
> + if (error == 0) {
> + *lenp = memseg.len;
> + n = strlcpy(namebuf, memseg.name, bufsize);
> + if (n >= bufsize) {
> + errno = ENAMETOOLONG;
> + error = -1;
> + }
> }
> return (error);
> }
>
> +static int
> +setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
> +{
> + char *ptr;
> + int error, flags;
> +
> + /* Map 'len' bytes starting at 'gpa' in the guest address space */
> + error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
> + if (error)
> + return (error);
> +
> + flags = MAP_SHARED | MAP_FIXED;
> + if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
> + flags |= MAP_NOCORE;
> +
> + /* mmap into the process address space on the host */
> + ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
> + if (ptr == MAP_FAILED)
> + return (-1);
> +
> + return (0);
> +}
> +
> int
> vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
> {
> - char **addr;
> - int error;
> + size_t objsize, len;
> + vm_paddr_t gpa;
> + char *baseaddr, *ptr;
> + int error, flags;
>
> - /* XXX VM_MMAP_SPARSE not implemented yet */
> - assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
> - ctx->vms = vms;
> + assert(vms == VM_MMAP_ALL);
>
> /*
> * If 'memsize' cannot fit entirely in the 'lowmem' segment then
> @@ -234,43 +371,69 @@ vm_setup_memory(struct vmctx *ctx, size_
> */
> if (memsize > ctx->lowmem_limit) {
> ctx->lowmem = ctx->lowmem_limit;
> - ctx->highmem = memsize - ctx->lowmem;
> + ctx->highmem = memsize - ctx->lowmem_limit;
> + objsize = 4*GB + ctx->highmem;
> } else {
> ctx->lowmem = memsize;
> ctx->highmem = 0;
> + objsize = ctx->lowmem;
> }
>
> - if (ctx->lowmem > 0) {
> - addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
> - error = setup_memory_segment(ctx, 0, ctx->lowmem, addr);
> + error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
> + if (error)
> + return (error);
> +
> + /*
> + * Stake out a contiguous region covering the guest physical memory
> + * and the adjoining guard regions.
> + */
> + len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
> + flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
> + ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
> + if (ptr == MAP_FAILED)
> + return (-1);
> +
> + baseaddr = ptr + VM_MMAP_GUARD_SIZE;
> + if (ctx->highmem > 0) {
> + gpa = 4*GB;
> + len = ctx->highmem;
> + error = setup_memory_segment(ctx, gpa, len, baseaddr);
> if (error)
> return (error);
> }
>
> - if (ctx->highmem > 0) {
> - addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
> - error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
> + if (ctx->lowmem > 0) {
> + gpa = 0;
> + len = ctx->lowmem;
> + error = setup_memory_segment(ctx, gpa, len, baseaddr);
> if (error)
> return (error);
> }
>
> + ctx->baseaddr = baseaddr;
> +
> return (0);
> }
>
> +/*
> + * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
> + * the lowmem or highmem regions.
> + *
> + * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
> + * The instruction emulation code depends on this behavior.
> + */
> void *
> vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
> {
>
> - /* XXX VM_MMAP_SPARSE not implemented yet */
> - assert(ctx->vms == VM_MMAP_ALL);
> -
> - if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
> - return ((void *)(ctx->lowmem_addr + gaddr));
> + if (ctx->lowmem > 0) {
> + if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
> + return (ctx->baseaddr + gaddr);
> + }
>
> - if (gaddr >= 4*GB) {
> - gaddr -= 4*GB;
> - if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
> - return ((void *)(ctx->highmem_addr + gaddr));
> + if (ctx->highmem > 0) {
> + if (gaddr >= 4*GB && gaddr + len <= 4*GB + ctx->highmem)
> + return (ctx->baseaddr + gaddr);
> }
>
> return (NULL);
> @@ -290,6 +453,56 @@ vm_get_highmem_size(struct vmctx *ctx)
> return (ctx->highmem);
> }
>
> +void *
> +vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
> +{
> + char pathname[MAXPATHLEN];
> + size_t len2;
> + char *base, *ptr;
> + int fd, error, flags;
> +
> + fd = -1;
> + ptr = MAP_FAILED;
> + if (name == NULL || strlen(name) == 0) {
> + errno = EINVAL;
> + goto done;
> + }
> +
> + error = vm_alloc_memseg(ctx, segid, len, name);
> + if (error)
> + goto done;
> +
> + strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
> + strlcat(pathname, ctx->name, sizeof(pathname));
> + strlcat(pathname, ".", sizeof(pathname));
> + strlcat(pathname, name, sizeof(pathname));
> +
> + fd = open(pathname, O_RDWR);
> + if (fd < 0)
> + goto done;
> +
> + /*
> + * Stake out a contiguous region covering the device memory and the
> + * adjoining guard regions.
> + */
> + len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
> + flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
> + base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
> + if (base == MAP_FAILED)
> + goto done;
> +
> + flags = MAP_SHARED | MAP_FIXED;
> + if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
> + flags |= MAP_NOCORE;
> +
> + /* mmap the devmem region in the host address space */
> + ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
> +done:
> + if (fd >= 0)
> + close(fd);
> + return (ptr);
> +}
> +
> int
> vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
> uint64_t base, uint32_t limit, uint32_t access)
>
> Modified: stable/10/lib/libvmmapi/vmmapi.h
> ==============================================================================
> --- stable/10/lib/libvmmapi/vmmapi.h Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/lib/libvmmapi/vmmapi.h Mon Feb 1 14:56:11 2016 (r295124)
> @@ -36,7 +36,7 @@
> * API version for out-of-tree consumers like grub-bhyve for making compile
> * time decisions.
> */
> -#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
> +#define VMMAPI_VERSION 0102 /* 2 digit major followed by 2 digit minor */
>
> struct iovec;
> struct vmctx;
> @@ -52,14 +52,59 @@ enum vm_mmap_style {
> VM_MMAP_SPARSE, /* mappings created on-demand */
> };
>
> +/*
> + * 'flags' value passed to 'vm_set_memflags()'.
> + */
> #define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
> +#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
> +
> +/*
> + * Identifiers for memory segments:
> + * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
> + * - the remaining identifiers can be used to create devmem segments.
> + */
> +enum {
> + VM_SYSMEM,
> + VM_BOOTROM,
> + VM_FRAMEBUFFER,
> +};
> +
> +/*
> + * Get the length and name of the memory segment identified by 'segid'.
> + * Note that system memory segments are identified with a nul name.
> + *
> + * Returns 0 on success and non-zero otherwise.
> + */
> +int vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
> + size_t namesiz);
> +
> +/*
> + * Iterate over the guest address space. This function finds an address range
> + * that starts at an address >= *gpa.
> + *
> + * Returns 0 if the next address range was found and non-zero otherwise.
> + */
> +int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
> + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
> +/*
> + * Create a device memory segment identified by 'segid'.
> + *
> + * Returns a pointer to the memory segment on success and MAP_FAILED otherwise.
> + */
> +void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
> + size_t len);
> +
> +/*
> + * Map the memory segment identified by 'segid' into the guest address space
> + * at [gpa,gpa+len) with protection 'prot'.
> + */
> +int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
> + vm_ooffset_t segoff, size_t len, int prot);
>
> int vm_create(const char *name);
> struct vmctx *vm_open(const char *name);
> void vm_destroy(struct vmctx *ctx);
> int vm_parse_memsize(const char *optarg, size_t *memsize);
> -int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
> - int *wired);
> int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
> void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
> int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
> @@ -68,6 +113,7 @@ int vm_gla2gpa(struct vmctx *, int vcpui
> uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
> void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
> void vm_set_memflags(struct vmctx *ctx, int flags);
> +int vm_get_memflags(struct vmctx *ctx);
> size_t vm_get_lowmem_size(struct vmctx *ctx);
> size_t vm_get_highmem_size(struct vmctx *ctx);
> int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
>
> Modified: stable/10/share/examples/bhyve/vmrun.sh
> ==============================================================================
> --- stable/10/share/examples/bhyve/vmrun.sh Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/share/examples/bhyve/vmrun.sh Mon Feb 1 14:56:11 2016 (r295124)
> @@ -48,8 +48,8 @@ usage() {
>
> echo "Usage: vmrun.sh [-ahi] [-c <CPUs>] [-C <console>] [-d <disk file>]"
> echo " [-e <name=value>] [-g <gdbport> ] [-H <directory>]"
> - echo " [-I <location of installation iso>] [-m <memsize>]"
> - echo " [-t <tapdev>] <vmname>"
> + echo " [-I <location of installation iso>] [-l <loader>]"
> + echo " [-m <memsize>] [-t <tapdev>] <vmname>"
> echo ""
> echo " -h: display this help message"
> echo " -a: force memory mapped local APIC access"
> @@ -61,6 +61,7 @@ usage() {
> echo " -H: host filesystem to export to the loader"
> echo " -i: force boot of the Installation CDROM image"
> echo " -I: Installation CDROM image location (default is ${DEFAULT_ISOFILE})"
> + echo " -l: the OS loader to use (default is /boot/userboot.so)"
> echo " -m: memory size (default is ${DEFAULT_MEMSIZE})"
> echo " -p: pass-through a host PCI device at bus/slot/func (e.g. 10/0/0)"
> echo " -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)"
> @@ -87,15 +88,15 @@ console=${DEFAULT_CONSOLE}
> cpus=${DEFAULT_CPUS}
> tap_total=0
> disk_total=0
> -apic_opt=""
> gdbport=0
> loader_opt=""
> +bhyverun_opt="-H -A -P"
> pass_total=0
>
> -while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do
> +while getopts ac:C:d:e:g:hH:iI:l:m:p:t: c ; do
> case $c in
> a)
> - apic_opt="-a"
> + bhyverun_opt="${bhyverun_opt} -a"
> ;;
> c)
> cpus=${OPTARG}
> @@ -125,6 +126,9 @@ while getopts ac:C:d:e:g:hH:iI:m:p:t: c
> I)
> isofile=${OPTARG}
> ;;
> + l)
> + loader_opt="${loader_opt} -l ${OPTARG}"
> + ;;
> m)
> memsize=${OPTARG}
> ;;
> @@ -163,6 +167,12 @@ if [ -n "${host_base}" ]; then
> loader_opt="${loader_opt} -h ${host_base}"
> fi
>
> +# If PCI passthru devices are configured then guest memory must be wired
> +if [ ${pass_total} -gt 0 ]; then
> + loader_opt="${loader_opt} -S"
> + bhyverun_opt="${bhyverun_opt} -S"
> +fi
> +
> make_and_check_diskdev()
> {
> local virtio_diskdev="$1"
> @@ -263,7 +273,7 @@ while [ 1 ]; do
> i=$(($i + 1))
> done
>
> - ${FBSDRUN} -c ${cpus} -m ${memsize} ${apic_opt} -A -H -P \
> + ${FBSDRUN} -c ${cpus} -m ${memsize} ${bhyverun_opt} \
> -g ${gdbport} \
> -s 0:0,hostbridge \
> -s 1:0,lpc \
>
> Modified: stable/10/sys/amd64/include/vmm.h
> ==============================================================================
> --- stable/10/sys/amd64/include/vmm.h Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/sys/amd64/include/vmm.h Mon Feb 1 14:56:11 2016 (r295124)
> @@ -108,7 +108,6 @@ enum x2apic_state {
>
> struct vm;
> struct vm_exception;
> -struct vm_memory_segment;
> struct seg_desc;
> struct vm_exit;
> struct vm_run;
> @@ -175,17 +174,33 @@ int vm_create(const char *name, struct v
> void vm_destroy(struct vm *vm);
> int vm_reinit(struct vm *vm);
> const char *vm_name(struct vm *vm);
> -int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
> +
> +/*
> + * APIs that modify the guest memory map require all vcpus to be frozen.
> + */
> +int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
> + size_t len, int prot, int flags);
> +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
> +void vm_free_memseg(struct vm *vm, int ident);
> int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
> int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
> -void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
> - void **cookie);
> +int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
> +int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
> +
> +/*
> + * APIs that inspect the guest memory map require only a *single* vcpu to
> + * be frozen. This acts like a read lock on the guest memory map since any
> + * modification requires *all* vcpus to be frozen.
> + */
> +int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
> + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
> +int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
> + struct vm_object **objptr);
> +void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
> + int prot, void **cookie);
> void vm_gpa_release(void *cookie);
> -int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
> - struct vm_memory_segment *seg);
> -int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
> - vm_offset_t *offset, struct vm_object **object);
> -boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
> +bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
> +
> int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
> int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
> int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
> @@ -302,8 +317,6 @@ vcpu_should_yield(struct vm *vm, int vcp
> void *vcpu_stats(struct vm *vm, int vcpu);
> void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
> struct vmspace *vm_get_vmspace(struct vm *vm);
> -int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
> -int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
> struct vatpic *vm_atpic(struct vm *vm);
> struct vatpit *vm_atpit(struct vm *vm);
> struct vpmtmr *vm_pmtmr(struct vm *vm);
>
> Modified: stable/10/sys/amd64/include/vmm_dev.h
> ==============================================================================
> --- stable/10/sys/amd64/include/vmm_dev.h Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/sys/amd64/include/vmm_dev.h Mon Feb 1 14:56:11 2016 (r295124)
> @@ -34,10 +34,22 @@ void vmmdev_init(void);
> int vmmdev_cleanup(void);
> #endif
>
> -struct vm_memory_segment {
> - vm_paddr_t gpa; /* in */
> +struct vm_memmap {
> + vm_paddr_t gpa;
> + int segid; /* memory segment */
> + vm_ooffset_t segoff; /* offset into memory segment */
> + size_t len; /* mmap length */
> + int prot; /* RWX */
> + int flags;
> +};
> +#define VM_MEMMAP_F_WIRED 0x01
> +#define VM_MEMMAP_F_IOMMU 0x02
> +
> +#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
> +struct vm_memseg {
> + int segid;
> size_t len;
> - int wired;
> + char name[SPECNAMELEN + 1];
> };
>
> struct vm_register {
> @@ -214,10 +226,14 @@ enum {
> IOCNUM_REINIT = 5,
>
> /* memory apis */
> - IOCNUM_MAP_MEMORY = 10,
> - IOCNUM_GET_MEMORY_SEG = 11,
> + IOCNUM_MAP_MEMORY = 10, /* deprecated */
> + IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */
> IOCNUM_GET_GPA_PMAP = 12,
> IOCNUM_GLA2GPA = 13,
> + IOCNUM_ALLOC_MEMSEG = 14,
> + IOCNUM_GET_MEMSEG = 15,
> + IOCNUM_MMAP_MEMSEG = 16,
> + IOCNUM_MMAP_GETNEXT = 17,
>
> /* register/state accessors */
> IOCNUM_SET_REGISTER = 20,
> @@ -278,10 +294,14 @@ enum {
> _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
> #define VM_REINIT \
> _IO('v', IOCNUM_REINIT)
> -#define VM_MAP_MEMORY \
> - _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
> -#define VM_GET_MEMORY_SEG \
> - _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
> +#define VM_ALLOC_MEMSEG \
> + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
> +#define VM_GET_MEMSEG \
> + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
> +#define VM_MMAP_MEMSEG \
> + _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
> +#define VM_MMAP_GETNEXT \
> + _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
> #define VM_SET_REGISTER \
> _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
> #define VM_GET_REGISTER \
>
> Modified: stable/10/sys/amd64/vmm/amd/svm.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/amd/svm.c Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/sys/amd64/vmm/amd/svm.c Mon Feb 1 14:56:11 2016 (r295124)
> @@ -1477,7 +1477,7 @@ svm_vmexit(struct svm_softc *svm_sc, int
> VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
> "reserved bits set: info1(%#lx) info2(%#lx)",
> info1, info2);
> - } else if (vm_mem_allocated(svm_sc->vm, info2)) {
> + } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
> vmexit->exitcode = VM_EXITCODE_PAGING;
> vmexit->u.paging.gpa = info2;
> vmexit->u.paging.fault_type = npf_fault_type(info1);
>
> Modified: stable/10/sys/amd64/vmm/intel/vmx.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/intel/vmx.c Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/sys/amd64/vmm/intel/vmx.c Mon Feb 1 14:56:11 2016 (r295124)
> @@ -2426,7 +2426,7 @@ vmx_exit_process(struct vmx *vmx, int vc
> * this must be an instruction that accesses MMIO space.
> */
> gpa = vmcs_gpa();
> - if (vm_mem_allocated(vmx->vm, gpa) ||
> + if (vm_mem_allocated(vmx->vm, vcpu, gpa) ||
> apic_access_fault(vmx, vcpu, gpa)) {
> vmexit->exitcode = VM_EXITCODE_PAGING;
> vmexit->inst_length = 0;
>
> Modified: stable/10/sys/amd64/vmm/io/ppt.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/io/ppt.c Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/sys/amd64/vmm/io/ppt.c Mon Feb 1 14:56:11 2016 (r295124)
> @@ -76,11 +76,17 @@ struct pptintr_arg { /* pptintr(pptin
> uint64_t msg_data;
> };
>
> +struct pptseg {
> + vm_paddr_t gpa;
> + size_t len;
> + int wired;
> +};
> +
> struct pptdev {
> device_t dev;
> struct vm *vm; /* owner of this device */
> TAILQ_ENTRY(pptdev) next;
> - struct vm_memory_segment mmio[MAX_MMIOSEGS];
> + struct pptseg mmio[MAX_MMIOSEGS];
> struct {
> int num_msgs; /* guest state */
>
> @@ -207,14 +213,14 @@ static void
> ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
> {
> int i;
> - struct vm_memory_segment *seg;
> + struct pptseg *seg;
>
> for (i = 0; i < MAX_MMIOSEGS; i++) {
> seg = &ppt->mmio[i];
> if (seg->len == 0)
> continue;
> (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
> - bzero(seg, sizeof(struct vm_memory_segment));
> + bzero(seg, sizeof(struct pptseg));
> }
> }
>
> @@ -324,7 +330,7 @@ ppt_is_mmio(struct vm *vm, vm_paddr_t gp
> {
> int i;
> struct pptdev *ppt;
> - struct vm_memory_segment *seg;
> + struct pptseg *seg;
>
> TAILQ_FOREACH(ppt, &pptdev_list, next) {
> if (ppt->vm != vm)
> @@ -410,7 +416,7 @@ ppt_map_mmio(struct vm *vm, int bus, int
> vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
> {
> int i, error;
> - struct vm_memory_segment *seg;
> + struct pptseg *seg;
> struct pptdev *ppt;
>
> ppt = ppt_find(bus, slot, func);
>
> Modified: stable/10/sys/amd64/vmm/vmm.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/vmm.c Mon Feb 1 14:28:58 2016 (r295123)
> +++ stable/10/sys/amd64/vmm/vmm.c Mon Feb 1 14:56:11 2016 (r295124)
> @@ -120,12 +120,21 @@ struct vcpu {
> #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
>
> struct mem_seg {
> + size_t len;
> + bool sysmem;
> + struct vm_object *object;
> +};
> +#define VM_MAX_MEMSEGS 2
> +
> +struct mem_map {
> vm_paddr_t gpa;
> size_t len;
> - boolean_t wired;
> - vm_object_t object;
> + vm_ooffset_t segoff;
> + int segid;
> + int prot;
> + int flags;
> };
> -#define VM_MAX_MEMORY_SEGMENTS 2
> +#define VM_MAX_MEMMAPS 4
>
> /*
> * Initialization:
> @@ -151,8 +160,8 @@ struct vm {
> void *rendezvous_arg; /* (x) rendezvous func/arg */
> vm_rendezvous_func_t rendezvous_func;
> struct mtx rendezvous_mtx; /* (o) rendezvous lock */
> - int num_mem_segs; /* (o) guest memory segments */
> - struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS];
> + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
> + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
> struct vmspace *vmspace; /* (o) guest's address space */
> char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
> struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
> @@ -224,6 +233,8 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_f
> SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
> "Force use of I/O MMU even if no passthrough devices were found.");
>
> +static void vm_free_memmap(struct vm *vm, int ident);
> +static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
> static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
>
> #ifdef KTR
> @@ -444,7 +455,6 @@ vm_create(const char *name, struct vm **
>
> vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
> strcpy(vm->name, name);
> - vm->num_mem_segs = 0;
> vm->vmspace = vmspace;
> mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
>
> @@ -455,18 +465,9 @@ vm_create(const char *name, struct vm **
> }
>
> static void
> -vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
> -{
> -
> - if (seg->object != NULL)
> - vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
> -
> - bzero(seg, sizeof(*seg));
> -}
> -
> -static void
> vm_cleanup(struct vm *vm, bool destroy)
> {
> + struct mem_map *mm;
> int i;
>
> ppt_unassign_all(vm);
> @@ -489,11 +490,23 @@ vm_cleanup(struct vm *vm, bool destroy)
>
> VMCLEANUP(vm->cookie);
>
> - if (destroy) {
> - for (i = 0; i < vm->num_mem_segs; i++)
> - vm_free_mem_seg(vm, &vm->mem_segs[i]);
> + /*
> + * System memory is removed from the guest address space only when
> + * the VM is destroyed. This is because the mapping remains the same
> + * across VM reset.
> + *
> + * Device memory can be relocated by the guest (e.g. using PCI BARs)
> + * so those mappings are removed on a VM reset.
> + */
> + for (i = 0; i < VM_MAX_MEMMAPS; i++) {
> + mm = &vm->mem_maps[i];
> + if (destroy || !sysmem_mapping(vm, mm))
> + vm_free_memmap(vm, i);
> + }
>
> - vm->num_mem_segs = 0;
> + if (destroy) {
> + for (i = 0; i < VM_MAX_MEMSEGS; i++)
> + vm_free_memseg(vm, i);
>
> VMSPACE_FREE(vm->vmspace);
> vm->vmspace = NULL;
> @@ -551,146 +564,243 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t
> return (0);
> }
>
> -boolean_t
> -vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
> +/*
> + * Return 'true' if 'gpa' is allocated in the guest address space.
> + *
> + * This function is called in the context of a running vcpu which acts as
> + * an implicit lock on 'vm->mem_maps[]'.
> + */
> +bool
> +vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
> {
> + struct mem_map *mm;
> int i;
> - vm_paddr_t gpabase, gpalimit;
>
> - for (i = 0; i < vm->num_mem_segs; i++) {
> - gpabase = vm->mem_segs[i].gpa;
> - gpalimit = gpabase + vm->mem_segs[i].len;
> - if (gpa >= gpabase && gpa < gpalimit)
> - return (TRUE); /* 'gpa' is regular memory */
> +#ifdef INVARIANTS
> + int hostcpu, state;
> + state = vcpu_get_state(vm, vcpuid, &hostcpu);
> + KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
> + ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
> +#endif
> +
> + for (i = 0; i < VM_MAX_MEMMAPS; i++) {
> + mm = &vm->mem_maps[i];
> + if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
> + return (true); /* 'gpa' is sysmem or devmem */
> }
>
> if (ppt_is_mmio(vm, gpa))
> - return (TRUE); /* 'gpa' is pci passthru mmio */
> + return (true); /* 'gpa' is pci passthru mmio */
>
> - return (FALSE);
> + return (false);
> }
>
> int
> -vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
> +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
> {
> - int available, allocated;
> struct mem_seg *seg;
> - vm_object_t object;
> - vm_paddr_t g;
> + vm_object_t obj;
>
> - if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
> + if (ident < 0 || ident >= VM_MAX_MEMSEGS)
> return (EINVAL);
> -
> - available = allocated = 0;
> - g = gpa;
> - while (g < gpa + len) {
> - if (vm_mem_allocated(vm, g))
> - allocated++;
> - else
> - available++;
>
> - g += PAGE_SIZE;
> - }
> -
> - /*
> - * If there are some allocated and some available pages in the address
> - * range then it is an error.
> - */
> - if (allocated && available)
> + if (len == 0 || (len & PAGE_MASK))
> return (EINVAL);
>
> - /*
> - * If the entire address range being requested has already been
> - * allocated then there isn't anything more to do.
> - */
> - if (allocated && available == 0)
> - return (0);
> -
> - if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
> - return (E2BIG);
> -
> - seg = &vm->mem_segs[vm->num_mem_segs];
> + seg = &vm->mem_segs[ident];
> + if (seg->object != NULL) {
> + if (seg->len == len && seg->sysmem == sysmem)
> + return (EEXIST);
> + else
> + return (EINVAL);
> + }
>
> - if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
> + obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
> + if (obj == NULL)
> return (ENOMEM);
>
> - seg->gpa = gpa;
> seg->len = len;
> - seg->object = object;
> - seg->wired = FALSE;
> + seg->object = obj;
> + seg->sysmem = sysmem;
> + return (0);
> +}
>
> - vm->num_mem_segs++;
> +int
> +vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
> + vm_object_t *objptr)
> +{
> + struct mem_seg *seg;
> +
> + if (ident < 0 || ident >= VM_MAX_MEMSEGS)
> + return (EINVAL);
>
> + seg = &vm->mem_segs[ident];
> + if (len)
> + *len = seg->len;
> + if (sysmem)
> + *sysmem = seg->sysmem;
> + if (objptr)
> + *objptr = seg->object;
> return (0);
> }
>
> -static vm_paddr_t
> -vm_maxmem(struct vm *vm)
> +void
> +vm_free_memseg(struct vm *vm, int ident)
> {
> - int i;
> - vm_paddr_t gpa, maxmem;
> + struct mem_seg *seg;
>
> - maxmem = 0;
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
> _______________________________________________
> svn-src-all at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-all
> To unsubscribe, send any mail to "svn-src-all-unsubscribe at freebsd.org"
--
Homepage: www.yamagi.org
XMPP: yamagi at yamagi.org
GnuPG/GPG: 0xEFBCCBCB
More information about the svn-src-stable
mailing list