svn commit: r189699 - in head/sys: . amd64/amd64 amd64/conf
amd64/include amd64/include/xen conf contrib/pf dev/ata
dev/cxgb dev/sound/usb dev/usb dev/xen/balloon
dev/xen/blkfront dev/xen/console d...
Doug Rabson
dfr at FreeBSD.org
Wed Mar 11 08:30:14 PDT 2009
Author: dfr
Date: Wed Mar 11 15:30:12 2009
New Revision: 189699
URL: http://svn.freebsd.org/changeset/base/189699
Log:
Merge in support for Xen HVM on amd64 architecture.
Added:
head/sys/amd64/conf/XENHVM (contents, props changed)
- copied, changed from r189614, user/dfr/xenhvm/7/sys/amd64/conf/XENHVM
head/sys/amd64/include/xen/ (props changed)
- copied from r189614, user/dfr/xenhvm/7/sys/amd64/include/xen/
head/sys/dev/xen/xenpci/ (props changed)
- copied from r189614, user/dfr/xenhvm/7/sys/dev/xen/xenpci/
head/sys/xen/features.h
- copied unchanged from r189614, user/dfr/xenhvm/7/sys/xen/features.h
head/sys/xen/reboot.c
- copied unchanged from r189614, user/dfr/xenhvm/7/sys/xen/reboot.c
Modified:
head/sys/ (props changed)
head/sys/amd64/amd64/machdep.c
head/sys/amd64/include/pcpu.h
head/sys/amd64/include/xen/xenvar.h
head/sys/conf/files
head/sys/conf/options.amd64
head/sys/contrib/pf/ (props changed)
head/sys/dev/ata/ata-usb.c (props changed)
head/sys/dev/cxgb/ (props changed)
head/sys/dev/sound/usb/uaudio.c (props changed)
head/sys/dev/sound/usb/uaudio.h (props changed)
head/sys/dev/sound/usb/uaudio_pcm.c (props changed)
head/sys/dev/sound/usb/uaudioreg.h (props changed)
head/sys/dev/usb/usb.h (props changed)
head/sys/dev/usb/usb_if.m (props changed)
head/sys/dev/usb/usbdevs (props changed)
head/sys/dev/usb/usbhid.h (props changed)
head/sys/dev/xen/balloon/balloon.c
head/sys/dev/xen/blkfront/blkfront.c
head/sys/dev/xen/console/console.c
head/sys/dev/xen/console/xencons_ring.c
head/sys/dev/xen/netfront/ (props changed)
head/sys/dev/xen/netfront/netfront.c
head/sys/dev/xen/xenpci/machine_reboot.c
head/sys/i386/include/xen/xenpmap.h
head/sys/xen/evtchn.h (props changed)
head/sys/xen/evtchn/evtchn.c
head/sys/xen/evtchn/evtchn_dev.c
head/sys/xen/features.c
head/sys/xen/gnttab.c
head/sys/xen/gnttab.h
head/sys/xen/hypervisor.h (contents, props changed)
head/sys/xen/interface/arch-x86/xen.h
head/sys/xen/interface/hvm/params.h
head/sys/xen/xen_intr.h (contents, props changed)
head/sys/xen/xenbus/xenbus_probe.c
head/sys/xen/xenbus/xenbus_xs.c
Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c Wed Mar 11 14:55:04 2009 (r189698)
+++ head/sys/amd64/amd64/machdep.c Wed Mar 11 15:30:12 2009 (r189699)
@@ -1494,6 +1494,14 @@ hammer_time(u_int64_t modulep, u_int64_t
if (env != NULL)
strlcpy(kernelname, env, sizeof(kernelname));
+#ifdef XENHVM
+ if (inw(0x10) == 0x49d2) {
+ if (bootverbose)
+ printf("Xen detected: disabling emulated block and network devices\n");
+ outw(0x10, 3);
+ }
+#endif
+
/* Location of kernel stack for locore */
return ((u_int64_t)thread0.td_pcb);
}
Copied and modified: head/sys/amd64/conf/XENHVM (from r189614, user/dfr/xenhvm/7/sys/amd64/conf/XENHVM)
==============================================================================
--- user/dfr/xenhvm/7/sys/amd64/conf/XENHVM Tue Mar 10 10:59:30 2009 (r189614, copy source)
+++ head/sys/amd64/conf/XENHVM Wed Mar 11 15:30:12 2009 (r189699)
@@ -1,8 +1,8 @@
#
# XENHVM -- Xen HVM kernel configuration file for FreeBSD/amd64
#
-# For more information on this file, please read the handbook section on
-# Kernel Configuration Files:
+# For more information on this file, please read the config(5) manual page,
+# and/or the handbook section on Kernel Configuration Files:
#
# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
#
@@ -19,11 +19,17 @@
# $FreeBSD$
cpu HAMMER
-ident XENHVM
+ident GENERIC
# To statically compile in device wiring instead of /boot/device.hints
#hints "GENERIC.hints" # Default places to look for devices.
+# Use the following to compile in values accessible to the kernel
+# through getenv() (or kenv(1) in userland). The format of the file
+# is 'variable=value', see kenv(1)
+#
+# env "GENERIC.env"
+
makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
makeoptions MODULES_OVERRIDE=""
@@ -31,7 +37,7 @@ options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
-options SCTP # Stream Control Transmission Protocol
+options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support
options UFS_ACL # Support for access control lists
@@ -42,18 +48,18 @@ options NFSCLIENT # Network Filesystem
options NFSSERVER # Network Filesystem Server
options NFSLOCKD # Network Lock Manager
options NFS_ROOT # NFS usable as /, requires NFSCLIENT
-options NTFS # NT File System
options MSDOSFS # MSDOS Filesystem
options CD9660 # ISO 9660 Filesystem
options PROCFS # Process filesystem (requires PSEUDOFS)
options PSEUDOFS # Pseudo-filesystem framework
options GEOM_PART_GPT # GUID Partition Tables.
options GEOM_LABEL # Provides labelization
-options COMPAT_43TTY # BSD 4.3 TTY compat [KEEP THIS!]
+options COMPAT_43TTY # BSD 4.3 TTY compat (sgtty)
options COMPAT_IA32 # Compatible with i386 binaries
options COMPAT_FREEBSD4 # Compatible with FreeBSD4
options COMPAT_FREEBSD5 # Compatible with FreeBSD5
options COMPAT_FREEBSD6 # Compatible with FreeBSD6
+options COMPAT_FREEBSD7 # Compatible with FreeBSD7
options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI
options KTRACE # ktrace(1) support
options STACK # stack(9) support
@@ -62,15 +68,20 @@ options SYSVMSG # SYSV-style message
options SYSVSEM # SYSV-style semaphores
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options ADAPTIVE_GIANT # Giant mutex is adaptive.
options STOP_NMI # Stop CPUS using NMI instead of IPI
+options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
options AUDIT # Security event auditing
#options KDTRACE_FRAME # Ensure frames are compiled in
#options KDTRACE_HOOKS # Kernel DTrace hooks
-options KDB
-options DDB
-options GDB
+# Debugging for use in -current
+options KDB # Enable kernel debugger support.
+options DDB # Support DDB.
+options GDB # Support remote GDB.
+options INVARIANTS # Enable calls of extra sanity checking
+options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
+options WITNESS # Enable checks to detect deadlocks and cycles
+options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
@@ -107,6 +118,7 @@ device cd # CD
device pass # Passthrough device (direct SCSI access)
device ses # SCSI Environmental Services (and SAF-TE)
+
# atkbdc0 controls both the keyboard and the PS/2 mouse
device atkbdc # AT keyboard controller
device atkbd # AT keyboard
@@ -124,7 +136,6 @@ device sc
device agp # support several AGP chipsets
# Serial (COM) ports
-device sio # 8250, 16[45]50 based serial ports
device uart # Generic UART driver
# PCI Ethernet NICs that use the common MII bus controller code.
@@ -136,10 +147,8 @@ device re # RealTek 8139C+/8169/8169S/
device loop # Network loopback
device random # Entropy device
device ether # Ethernet support
-device sl # Kernel SLIP
-device ppp # Kernel PPP
device tun # Packet tunnel.
-device pty # Pseudo-ttys (telnet etc)
+device pty # BSD-style compatibility pseudo ttys
device md # Memory "disks"
device gif # IPv6 and IPv4 tunneling
device faith # IPv6-to-IPv4 relaying (translation)
@@ -149,4 +158,3 @@ device firmware # firmware assist modul
# Be aware of the administrative consequences of enabling this!
# Note that 'bpf' is required for DHCP.
device bpf # Berkeley packet filter
-
Modified: head/sys/amd64/include/pcpu.h
==============================================================================
--- head/sys/amd64/include/pcpu.h Wed Mar 11 14:55:04 2009 (r189698)
+++ head/sys/amd64/include/pcpu.h Wed Mar 11 15:30:12 2009 (r189699)
@@ -33,6 +33,24 @@
#error "sys/cdefs.h is a prerequisite for this file"
#endif
+#if defined(XEN) || defined(XENHVM)
+#ifndef NR_VIRQS
+#define NR_VIRQS 24
+#endif
+#ifndef NR_IPIS
+#define NR_IPIS 2
+#endif
+#endif
+
+#ifdef XENHVM
+#define PCPU_XEN_FIELDS \
+ ; \
+ unsigned int pc_last_processed_l1i; \
+ unsigned int pc_last_processed_l2i
+#else
+#define PCPU_XEN_FIELDS
+#endif
+
/*
* The SMP parts are setup in pmap.c and locore.s for the BSP, and
* mp_machdep.c sets up the data for the AP's to "see" when they awake.
@@ -49,7 +67,8 @@
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
u_int pc_apic_id; \
u_int pc_acpi_id; /* ACPI CPU id */ \
- struct user_segment_descriptor *pc_gs32p
+ struct user_segment_descriptor *pc_gs32p \
+ PCPU_XEN_FIELDS
#ifdef _KERNEL
Modified: head/sys/amd64/include/xen/xenvar.h
==============================================================================
--- user/dfr/xenhvm/7/sys/amd64/include/xen/xenvar.h Tue Mar 10 10:59:30 2009 (r189614)
+++ head/sys/amd64/include/xen/xenvar.h Wed Mar 11 15:30:12 2009 (r189699)
@@ -71,6 +71,7 @@ machtophys(vm_paddr_t ma)
#define MFNTOPFN(ma) (ma)
#define set_phys_to_machine(pfn, mfn) ((void)0)
+#define phys_to_machine_mapping_valid(pfn) (TRUE)
#define PT_UPDATES_FLUSH() ((void)0)
#else
Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Wed Mar 11 14:55:04 2009 (r189698)
+++ head/sys/conf/files Wed Mar 11 15:30:12 2009 (r189699)
@@ -2758,21 +2758,24 @@ gnu/fs/xfs/xfs_iomap.c optional xfs \
gnu/fs/xfs/xfs_behavior.c optional xfs \
compile-with "${NORMAL_C} -I$S/gnu/fs/xfs/FreeBSD -I$S/gnu/fs/xfs/FreeBSD/support -I$S/gnu/fs/xfs"
-xen/gnttab.c optional xen
-xen/features.c optional xen
-xen/evtchn/evtchn.c optional xen
-xen/evtchn/evtchn_dev.c optional xen
-xen/xenbus/xenbus_client.c optional xen
-xen/xenbus/xenbus_comms.c optional xen
-xen/xenbus/xenbus_dev.c optional xen
-xen/xenbus/xenbus_if.m optional xen
-xen/xenbus/xenbus_probe.c optional xen
-#xen/xenbus/xenbus_probe_backend.c optional xen
-xen/xenbus/xenbus_xs.c optional xen
-dev/xen/console/console.c optional xen
-dev/xen/console/xencons_ring.c optional xen
-dev/xen/blkfront/blkfront.c optional xen
-dev/xen/netfront/netfront.c optional xen
-#dev/xen/xenpci/xenpci.c optional xen
-#xen/xenbus/xenbus_newbus.c optional xenhvm
+xen/gnttab.c optional xen | xenhvm
+xen/features.c optional xen | xenhvm
+xen/evtchn/evtchn.c optional xen
+xen/evtchn/evtchn_dev.c optional xen | xenhvm
+xen/reboot.c optional xen
+xen/xenbus/xenbus_client.c optional xen | xenhvm
+xen/xenbus/xenbus_comms.c optional xen | xenhvm
+xen/xenbus/xenbus_dev.c optional xen | xenhvm
+xen/xenbus/xenbus_if.m optional xen | xenhvm
+xen/xenbus/xenbus_probe.c optional xen | xenhvm
+#xen/xenbus/xenbus_probe_backend.c optional xen
+xen/xenbus/xenbus_xs.c optional xen | xenhvm
+dev/xen/balloon/balloon.c optional xen | xenhvm
+dev/xen/console/console.c optional xen
+dev/xen/console/xencons_ring.c optional xen
+dev/xen/blkfront/blkfront.c optional xen | xenhvm
+dev/xen/netfront/netfront.c optional xen | xenhvm
+dev/xen/xenpci/xenpci.c optional xenpci
+dev/xen/xenpci/evtchn.c optional xenpci
+dev/xen/xenpci/machine_reboot.c optional xenpci
Modified: head/sys/conf/options.amd64
==============================================================================
--- head/sys/conf/options.amd64 Wed Mar 11 14:55:04 2009 (r189698)
+++ head/sys/conf/options.amd64 Wed Mar 11 15:30:12 2009 (r189699)
@@ -57,3 +57,5 @@ KDTRACE_FRAME opt_kdtrace.h
# BPF just-in-time compiler
BPF_JITTER opt_bpf.h
+
+XENHVM opt_global.h
Modified: head/sys/dev/xen/balloon/balloon.c
==============================================================================
--- head/sys/dev/xen/balloon/balloon.c Wed Mar 11 14:55:04 2009 (r189698)
+++ head/sys/dev/xen/balloon/balloon.c Wed Mar 11 15:30:12 2009 (r189699)
@@ -34,11 +34,24 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/sysctl.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen-os.h>
-#include <machine/xenbus.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <machine/xen/xenvar.h>
+#include <xen/hypervisor.h>
+#include <xen/xenbus/xenbusvar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
+
+struct mtx balloon_mutex;
/*
* Protects atomic reservation decrease/increase against concurrent increases.
@@ -46,23 +59,44 @@ __FBSDID("$FreeBSD$");
* balloon lists.
*/
struct mtx balloon_lock;
-#ifdef notyet
-
-/* We aim for 'current allocation' == 'target allocation'. */
-static unsigned long current_pages;
-static unsigned long target_pages;
-/* VM /proc information for memory */
-extern unsigned long totalram_pages;
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
+
+struct balloon_stats {
+ /* We aim for 'current allocation' == 'target allocation'. */
+ unsigned long current_pages;
+ unsigned long target_pages;
+ /* We may hit the hard limit in Xen. If we do then we remember it. */
+ unsigned long hard_limit;
+ /*
+ * Drivers may alter the memory reservation independently, but they
+ * must inform the balloon driver so we avoid hitting the hard limit.
+ */
+ unsigned long driver_pages;
+ /* Number of pages in high- and low-memory balloons. */
+ unsigned long balloon_low;
+ unsigned long balloon_high;
+};
-/* We may hit the hard limit in Xen. If we do then we remember it. */
-static unsigned long hard_limit;
+static struct balloon_stats balloon_stats;
+#define bs balloon_stats
-/*
- * Drivers may alter the memory reservation independently, but they must
- * inform the balloon driver so that we can avoid hitting the hard limit.
- */
-static unsigned long driver_pages;
+SYSCTL_DECL(_dev_xen);
+SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
+ &bs.current_pages, 0, "Current allocation");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
+ &bs.target_pages, 0, "Target allocation");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
+ &bs.driver_pages, 0, "Driver pages");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
+ &bs.hard_limit, 0, "Xen hard limit");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
+ &bs.balloon_low, 0, "Low-mem balloon");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
+ &bs.balloon_high, 0, "High-mem balloon");
struct balloon_entry {
vm_page_t page;
@@ -72,9 +106,6 @@ struct balloon_entry {
/* List of ballooned pages, threaded through the mem_map array. */
static STAILQ_HEAD(,balloon_entry) ballooned_pages;
-static unsigned long balloon_low, balloon_high;
-
-
/* Main work function, always executed in process context. */
static void balloon_process(void *unused);
@@ -89,10 +120,10 @@ balloon_append(vm_page_t page)
{
struct balloon_entry *entry;
- entry = malloc(sizeof(struct balloon_entry), M_WAITOK);
-
+ entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
+ entry->page = page;
STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
- balloon_low++;
+ bs.balloon_low++;
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -111,13 +142,13 @@ balloon_retrieve(void)
page = entry->page;
free(entry, M_DEVBUF);
- balloon_low--;
+ bs.balloon_low--;
return page;
}
static void
-balloon_alarm(unsigned long unused)
+balloon_alarm(void *unused)
{
wakeup(balloon_process);
}
@@ -125,17 +156,56 @@ balloon_alarm(unsigned long unused)
static unsigned long
current_target(void)
{
- unsigned long target = min(target_pages, hard_limit);
- if (target > (current_pages + balloon_low + balloon_high))
- target = current_pages + balloon_low + balloon_high;
+ unsigned long target = min(bs.target_pages, bs.hard_limit);
+ if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
+ target = bs.current_pages + bs.balloon_low + bs.balloon_high;
return target;
}
+static unsigned long
+minimum_target(void)
+{
+#ifdef XENHVM
+#define max_pfn physmem
+#endif
+ unsigned long min_pages, curr_pages = current_target();
+
+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+ /* Simple continuous piecewiese linear function:
+ * max MiB -> min MiB gradient
+ * 0 0
+ * 16 16
+ * 32 24
+ * 128 72 (1/2)
+ * 512 168 (1/4)
+ * 2048 360 (1/8)
+ * 8192 552 (1/32)
+ * 32768 1320
+ * 131072 4392
+ */
+ if (max_pfn < MB2PAGES(128))
+ min_pages = MB2PAGES(8) + (max_pfn >> 1);
+ else if (max_pfn < MB2PAGES(512))
+ min_pages = MB2PAGES(40) + (max_pfn >> 2);
+ else if (max_pfn < MB2PAGES(2048))
+ min_pages = MB2PAGES(104) + (max_pfn >> 3);
+ else
+ min_pages = MB2PAGES(296) + (max_pfn >> 5);
+#undef MB2PAGES
+
+ /* Don't enforce growth */
+ return min(min_pages, curr_pages);
+#ifndef CONFIG_XEN
+#undef max_pfn
+#endif
+}
+
static int
increase_reservation(unsigned long nr_pages)
{
- unsigned long *mfn_list, pfn, i, flags;
- struct page *page;
+ unsigned long pfn, i;
+ struct balloon_entry *entry;
+ vm_page_t page;
long rc;
struct xen_memory_reservation reservation = {
.address_bits = 0,
@@ -143,64 +213,81 @@ increase_reservation(unsigned long nr_pa
.domid = DOMID_SELF
};
- if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
- nr_pages = PAGE_SIZE / sizeof(unsigned long);
+ if (nr_pages > ARRAY_SIZE(frame_list))
+ nr_pages = ARRAY_SIZE(frame_list);
- mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
- if (mfn_list == NULL)
- return ENOMEM;
+ mtx_lock(&balloon_lock);
+ for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
+ i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
+ KASSERT(entry, ("ballooned_pages list corrupt"));
+ page = entry->page;
+ frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+ }
- reservation.extent_start = mfn_list;
+ set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation);
+ XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) {
- int ret;
- /* We hit the Xen hard limit: reprobe. */
- reservation.extent_start = mfn_list;
- reservation.nr_extents = rc;
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- PANIC_IF(ret != rc);
- hard_limit = current_pages + rc - driver_pages;
+ if (rc > 0) {
+ int ret;
+
+ /* We hit the Xen hard limit: reprobe. */
+ reservation.nr_extents = rc;
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
+ }
+ if (rc >= 0)
+ bs.hard_limit = (bs.current_pages + rc -
+ bs.driver_pages);
goto out;
}
for (i = 0; i < nr_pages; i++) {
page = balloon_retrieve();
- PANIC_IF(page == NULL);
+ KASSERT(page, ("balloon_retrieve failed"));
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
- PANIC_IF(phys_to_machine_mapping_valid(pfn));
+ KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
+ !phys_to_machine_mapping_valid(pfn)),
+ ("auto translated physmap but mapping is valid"));
+
+ set_phys_to_machine(pfn, frame_list[i]);
+
+#ifndef XENHVM
+ /* Link back into the page tables if not highmem. */
+ if (pfn < max_low_pfn) {
+ int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte_ma(frame_list[i], PAGE_KERNEL),
+ 0);
+ PASSING(ret == 0,
+ ("HYPERVISOR_update_va_mapping failed"));
+ }
+#endif
- /* Update P->M and M->P tables. */
- PFNTOMFN(pfn) = mfn_list[i];
- xen_machphys_update(mfn_list[i], pfn);
-
/* Relinquish the page back to the allocator. */
- ClearPageReserved(page);
- set_page_count(page, 1);
+ vm_page_unwire(page, 0);
vm_page_free(page);
}
- current_pages += nr_pages;
- totalram_pages = current_pages;
+ bs.current_pages += nr_pages;
+ //totalram_pages = bs.current_pages;
out:
- balloon_unlock(flags);
-
- free((mfn_list);
+ mtx_unlock(&balloon_lock);
return 0;
}
-static int
+static int
decrease_reservation(unsigned long nr_pages)
{
- unsigned long *mfn_list, pfn, i, flags;
- struct page *page;
- void *v;
+ unsigned long pfn, i;
+ vm_page_t page;
int need_sleep = 0;
int ret;
struct xen_memory_reservation reservation = {
@@ -209,48 +296,68 @@ decrease_reservation(unsigned long nr_pa
.domid = DOMID_SELF
};
- if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
- nr_pages = PAGE_SIZE / sizeof(unsigned long);
-
- mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
- if (mfn_list == NULL)
- return ENOMEM;
+ if (nr_pages > ARRAY_SIZE(frame_list))
+ nr_pages = ARRAY_SIZE(frame_list);
for (i = 0; i < nr_pages; i++) {
int color = 0;
if ((page = vm_page_alloc(NULL, color++,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+ VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
nr_pages = i;
need_sleep = 1;
break;
}
+
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
- mfn_list[i] = PFNTOMFN(pfn);
+ frame_list[i] = PFNTOMFN(pfn);
+
+#if 0
+ if (!PageHighMem(page)) {
+ v = phys_to_virt(pfn << PAGE_SHIFT);
+ scrub_pages(v, 1);
+#ifdef CONFIG_XEN
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)v, __pte_ma(0), 0);
+ BUG_ON(ret);
+#endif
+ }
+#endif
+#ifdef CONFIG_XEN_SCRUB_PAGES
+ else {
+ v = kmap(page);
+ scrub_pages(v, 1);
+ kunmap(page);
+ }
+#endif
}
- balloon_lock(flags);
+#ifdef CONFIG_XEN
+ /* Ensure that ballooned highmem pages don't have kmaps. */
+ kmap_flush_unused();
+ flush_tlb_all();
+#endif
+
+ mtx_lock(&balloon_lock);
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
- pfn = MFNTOPFN(mfn_list[i]);
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
+ pfn = MFNTOPFN(frame_list[i]);
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
}
- reservation.extent_start = mfn_list;
+ set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
- PANIC_IF(ret != nr_pages);
+ KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
- current_pages -= nr_pages;
- totalram_pages = current_pages;
-
- balloon_unlock(flags);
+ bs.current_pages -= nr_pages;
+ //totalram_pages = bs.current_pages;
- free(mfn_list, M_DEVBUF);
+ mtx_unlock(&balloon_lock);
- return need_sleep;
+ return (need_sleep);
}
/*
@@ -265,27 +372,24 @@ balloon_process(void *unused)
int need_sleep = 0;
long credit;
+ mtx_lock(&balloon_mutex);
for (;;) {
do {
- credit = current_target() - current_pages;
+ credit = current_target() - bs.current_pages;
if (credit > 0)
need_sleep = (increase_reservation(credit) != 0);
if (credit < 0)
need_sleep = (decrease_reservation(-credit) != 0);
-#ifndef CONFIG_PREEMPT
- if (need_resched())
- schedule();
-#endif
} while ((credit != 0) && !need_sleep);
/* Schedule more work if there is some still to be done. */
- if (current_target() != current_pages)
- timeout(balloon_alarm, NULL, ticks + HZ);
+ if (current_target() != bs.current_pages)
+ timeout(balloon_alarm, NULL, ticks + hz);
- msleep(balloon_process, balloon_lock, 0, "balloon", -1);
+ msleep(balloon_process, &balloon_mutex, 0, "balloon", -1);
}
-
+ mtx_unlock(&balloon_mutex);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -293,8 +397,8 @@ static void
set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
- hard_limit = ~0UL;
- target_pages = target;
+ bs.hard_limit = ~0UL;
+ bs.target_pages = max(target, minimum_target());
wakeup(balloon_process);
}
@@ -311,8 +415,9 @@ watch_target(struct xenbus_watch *watch,
unsigned long long new_target;
int err;
- err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
- if (err != 1) {
+ err = xenbus_scanf(XBT_NIL, "memory", "target", NULL,
+ "%llu", &new_target);
+ if (err) {
/* This is ok (for domain0 at least) - so just return */
return;
}
@@ -325,7 +430,7 @@ watch_target(struct xenbus_watch *watch,
}
static void
-balloon_init_watcher(void *)
+balloon_init_watcher(void *arg)
{
int err;
@@ -334,48 +439,60 @@ balloon_init_watcher(void *)
printf("Failed to set balloon watcher\n");
}
+SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ balloon_init_watcher, NULL);
static void
-balloon_init(void *)
+balloon_init(void *arg)
{
- unsigned long pfn;
- struct page *page;
+#ifndef XENHVM
+ vm_page_t page;
+#endif
- IPRINTK("Initialising balloon driver.\n");
+ if (!is_running_on_xen())
+ return;
- if (xen_init() < 0)
- return -1;
-
- current_pages = min(xen_start_info->nr_pages, max_pfn);
- target_pages = current_pages;
- balloon_low = 0;
- balloon_high = 0;
- driver_pages = 0UL;
- hard_limit = ~0UL;
-
- init_timer(&balloon_timer);
- balloon_timer.data = 0;
- balloon_timer.function = balloon_alarm;
+ mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
+ mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
+
+#ifndef XENHVM
+ bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
+#else
+ bs.current_pages = physmem;
+#endif
+ bs.target_pages = bs.current_pages;
+ bs.balloon_low = 0;
+ bs.balloon_high = 0;
+ bs.driver_pages = 0UL;
+ bs.hard_limit = ~0UL;
+
+ kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
+// init_timer(&balloon_timer);
+// balloon_timer.data = 0;
+// balloon_timer.function = balloon_alarm;
+#ifndef XENHVM
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
balloon_append(page);
}
+#endif
target_watch.callback = watch_target;
- return 0;
+ return;
}
+SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL);
+
+void balloon_update_driver_allowance(long delta);
void
balloon_update_driver_allowance(long delta)
{
- unsigned long flags;
-
- balloon_lock(flags);
- driver_pages += delta;
- balloon_unlock(flags);
+ mtx_lock(&balloon_lock);
+ bs.driver_pages += delta;
+ mtx_unlock(&balloon_lock);
}
#if 0
@@ -393,17 +510,18 @@ static int dealloc_pte_fn(
set_pte_at(&init_mm, addr, pte, __pte_ma(0));
set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
- PANIC_IF(ret != 1);
+ KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
return 0;
}
#endif
+
+#if 0
vm_page_t
balloon_alloc_empty_page_range(unsigned long nr_pages)
{
- unsigned long flags;
vm_page_t pages;
- int i;
+ int i, rc;
unsigned long *mfn_list;
struct xen_memory_reservation reservation = {
.address_bits = 0,
@@ -422,7 +540,9 @@ balloon_alloc_empty_page_range(unsigned
PFNTOMFN(i) = INVALID_P2M_ENTRY;
reservation.extent_start = mfn_list;
reservation.nr_extents = nr_pages;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != nr_pages);
+ rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
}
current_pages -= nr_pages;
@@ -435,12 +555,11 @@ balloon_alloc_empty_page_range(unsigned
void
balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
{
- unsigned long i, flags;
+ unsigned long i;
for (i = 0; i < nr_pages; i++)
balloon_append(page + i);
wakeup(balloon_process);
}
-
#endif
Modified: head/sys/dev/xen/blkfront/blkfront.c
==============================================================================
--- head/sys/dev/xen/blkfront/blkfront.c Wed Mar 11 14:55:04 2009 (r189698)
+++ head/sys/dev/xen/blkfront/blkfront.c Wed Mar 11 15:30:12 2009 (r189699)
@@ -40,17 +40,17 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <machine/vmparam.h>
-#include <xen/hypervisor.h>
#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/hypervisor.h>
#include <xen/xen_intr.h>
#include <xen/evtchn.h>
+#include <xen/gnttab.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/protocols.h>
#include <xen/xenbus/xenbusvar.h>
#include <geom/geom_disk.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/gnttab.h>
#include <dev/xen/blkfront/block.h>
@@ -106,7 +106,7 @@ static char * blkif_status_name[] = {
#endif
#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
#if 0
-#define DPRINTK(fmt, args...) printf("[XEN] %s:%d" fmt ".\n", __FUNCTION__, __LINE__,##args)
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
#else
#define DPRINTK(fmt, args...)
#endif
@@ -138,7 +138,6 @@ pfn_to_mfn(vm_paddr_t pfn)
return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
}
-
/*
* Translate Linux major/minor to an appropriate name and unit
* number. For HVM guests, this allows us to use the same drive names
@@ -323,17 +322,17 @@ blkfront_probe(device_t dev)
static int
blkfront_attach(device_t dev)
{
- int err, vdevice, i, unit;
+ int error, vdevice, i, unit;
struct blkfront_info *info;
const char *name;
/* FIXME: Use dynamic device id if this is not set. */
- err = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
+ error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
"virtual-device", NULL, "%i", &vdevice);
- if (err) {
- xenbus_dev_fatal(dev, err, "reading virtual-device");
+ if (error) {
+ xenbus_dev_fatal(dev, error, "reading virtual-device");
printf("couldn't find virtual device");
- return (err);
+ return (error);
}
blkfront_vdevice_to_unit(vdevice, &unit, &name);
@@ -362,9 +361,22 @@ blkfront_attach(device_t dev)
/* Front end dir is a number, which is used as the id. */
info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
- err = talk_to_backend(dev, info);
- if (err)
- return (err);
+ error = talk_to_backend(dev, info);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+static int
+blkfront_suspend(device_t dev)
+{
+ struct blkfront_info *info = device_get_softc(dev);
+
+ /* Prevent new requests being issued until we fix things up. */
+ mtx_lock(&blkif_io_lock);
+ info->connected = BLKIF_STATE_SUSPENDED;
+ mtx_unlock(&blkif_io_lock);
return (0);
}
@@ -375,16 +387,14 @@ blkfront_resume(device_t dev)
struct blkfront_info *info = device_get_softc(dev);
int err;
- DPRINTK("blkfront_resume: %s\n", dev->nodename);
+ DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
blkif_free(info, 1);
-
err = talk_to_backend(dev, info);
-
if (info->connected == BLKIF_STATE_SUSPENDED && !err)
blkif_recover(info);
- return err;
+ return (err);
}
/* Common code used when first setting up, and when resuming. */
@@ -425,6 +435,7 @@ talk_to_backend(device_t dev, struct blk
message = "writing protocol";
goto abort_transaction;
}
+
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == EAGAIN)
@@ -462,8 +473,8 @@ setup_blkring(device_t dev, struct blkfr
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
- error = xenbus_grant_ring(dev, (vtomach(info->ring.sring) >> PAGE_SHIFT),
- &info->ring_ref);
+ error = xenbus_grant_ring(dev,
+ (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
if (error) {
free(sring, M_DEVBUF);
info->ring.sring = NULL;
@@ -471,11 +482,11 @@ setup_blkring(device_t dev, struct blkfr
}
error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
- "xbd", (driver_intr_t *)blkif_int, info,
- INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
+ "xbd", (driver_intr_t *)blkif_int, info,
+ INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
if (error) {
xenbus_dev_fatal(dev, error,
- "bind_evtchn_to_irqhandler failed");
+ "bind_evtchn_to_irqhandler failed");
goto fail;
}
@@ -494,7 +505,7 @@ blkfront_backend_changed(device_t dev, X
{
struct blkfront_info *info = device_get_softc(dev);
- DPRINTK("blkfront:backend_changed.\n");
+ DPRINTK("backend_state=%d\n", backend_state);
switch (backend_state) {
case XenbusStateUnknown:
@@ -707,7 +718,7 @@ blkif_open(struct disk *dp)
struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
if (sc == NULL) {
- printk("xb%d: not found", sc->xb_unit);
+ printf("xb%d: not found", sc->xb_unit);
return (ENXIO);
}
@@ -1019,9 +1030,11 @@ blkif_recover(struct blkfront_info *info
blkif_request_t *req;
struct blk_shadow *copy;
+ if (!info->sc)
+ return;
+
/* Stage 1: Make a safe copy of the shadow state. */
copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
- PANIC_IF(copy == NULL);
memcpy(copy, info->shadow, sizeof(info->shadow));
/* Stage 2: Set up free list. */
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list