svn commit: r305502 - in head: share/man/man9 sys/amd64/vmm/io sys/dev/pci
John Baldwin
jhb at FreeBSD.org
Tue Sep 6 21:15:37 UTC 2016
Author: jhb
Date: Tue Sep 6 21:15:35 2016
New Revision: 305502
URL: https://svnweb.freebsd.org/changeset/base/305502
Log:
Reset PCI pass through devices via PCI-e FLR during VM start and end.
Add routines to trigger a function level reset (FLR) of a PCI-express
device via the PCI-express device control register. This also includes
support routines to wait for pending transactions to complete as well
as calculating the maximum completion timeout permitted by a device.
Change the ppt(4) driver to reset pass through devices before attaching
to a VM during startup and before detaching from a VM during shutdown.
Reviewed by: imp, wblock (earlier version)
MFC after: 1 month
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D7751
Modified:
head/share/man/man9/Makefile
head/share/man/man9/pci.9
head/sys/amd64/vmm/io/ppt.c
head/sys/dev/pci/pci.c
head/sys/dev/pci/pcireg.h
head/sys/dev/pci/pcivar.h
Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile Tue Sep 6 21:01:38 2016 (r305501)
+++ head/share/man/man9/Makefile Tue Sep 6 21:15:35 2016 (r305502)
@@ -1354,7 +1354,10 @@ MLINKS+=pci.9 pci_alloc_msi.9 \
pci.9 pci_set_max_read_req.9 \
pci.9 pci_write_config.9 \
pci.9 pcie_adjust_config.9 \
+ pci.9 pcie_flr.9 \
+ pci.9 pcie_max_completion_timeout.9 \
pci.9 pcie_read_config.9 \
+ pci.9 pcie_wait_for_pending_transactions.9 \
pci.9 pcie_write_config.9
MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \
pci_iov_schema.9 pci_iov_schema_add_bool.9 \
Modified: head/share/man/man9/pci.9
==============================================================================
--- head/share/man/man9/pci.9 Tue Sep 6 21:01:38 2016 (r305501)
+++ head/share/man/man9/pci.9 Tue Sep 6 21:15:35 2016 (r305502)
@@ -66,7 +66,10 @@
.Nm pci_set_powerstate ,
.Nm pci_write_config ,
.Nm pcie_adjust_config ,
+.Nm pcie_flr ,
+.Nm pcie_get_max_completion_timeout ,
.Nm pcie_read_config ,
+.Nm pcie_wait_for_pending_transactions ,
.Nm pcie_write_config
.Nd PCI bus interface
.Sh SYNOPSIS
@@ -145,8 +148,14 @@
.Fa "uint32_t val"
.Fa "int width"
.Fc
+.Ft bool
+.Fn pcie_flr "device_t dev" "u_int max_delay" "bool force"
+.Ft int
+.Fn pcie_get_max_completion_timeout "device_t dev"
.Ft uint32_t
.Fn pcie_read_config "device_t dev" "int reg" "int width"
+.Ft bool
+.Fn pcie_wait_for_pending_transactions "device_t dev" "u_int max_delay"
.Ft void
.Fn pcie_write_config "device_t dev" "int reg" "uint32_t val" "int width"
.Ft void
@@ -431,6 +440,51 @@ keyword,
then
.Fn pci_get_vpd_readonly
returns an error.
+.Pp
+The
+.Fn pcie_get_max_completion_timeout
+function returns the maximum completion timeout configured for the device
+.Fa dev
+in microseconds.
+If the
+.Fa dev
+device is not a PCI-express device,
+.Fn pcie_get_max_completion_timeout
+returns zero.
+When completion timeouts are disabled for
+.Fa dev ,
+this function returns the maxmimum timeout that would be used if timeouts
+were enabled.
+.Pp
+The
+.Fn pcie_wait_for_pending_transactions
+function waits for any pending transactions initiated by the
+.Fa dev
+device to complete.
+The function checks for pending transactions by polling the transactions
+pending flag in the PCI-express device status register.
+It returns
+.Dv true
+once the transaction pending flag is clear.
+If transactions are still pending after
+.Fa max_delay
+milliseconds,
+.Fn pcie_wait_for_pending_transactions
+returns
+.Dv false .
+If
+.Fa max_delay
+is set to zero,
+.Fn pcie_wait_for_pending_transactions
+performs a single check;
+otherwise,
+this function may sleep while polling the transactions pending flag.
+.Nm pcie_wait_for_pending_transactions
+returns
+.Dv true
+if
+.Fa dev
+is not a PCI-express device.
.Ss Device Configuration
The
.Fn pci_enable_busmaster
@@ -662,6 +716,51 @@ is invoked,
then the device will be transitioned to
.Dv PCI_POWERSTATE_D0
before any config registers are restored.
+.Pp
+The
+.Fn pcie_flr
+function requests a Function Level Reset
+.Pq FLR
+of
+.Fa dev .
+If
+.Fa dev
+is not a PCI-express device or does not support Function Level Resets via
+the PCI-express device control register,
+.Dv false
+is returned.
+Pending transactions are drained by disabling busmastering and calling
+.Fn pcie_wait_for_pending_transactions
+before resetting the device.
+The
+.Fa max_delay
+argument specifies the maximum timeout to wait for pending transactions as
+described for
+.Fn pcie_wait_for_pending_transactions .
+If
+.Fn pcie_wait_for_pending_transactions
+fails with a timeout and
+.Fa force
+is
+.Dv false ,
+busmastering is re-enabled and
+.Dv false
+is returned.
+If
+.Fn pcie_wait_for_pending_transactions
+fails with a timeout and
+.Fa force
+is
+.Dv true ,
+the device is reset despite the timeout.
+After the reset has been requested,
+.Nm pcie_flr
+sleeps for at least 100 milliseconds before returning
+.Dv true .
+Note that
+.Nm pcie_flr
+does not save and restore any state around the reset.
+The caller should save and restore state as needed.
.Ss Message Signaled Interrupts
Message Signaled Interrupts
.Pq MSI
Modified: head/sys/amd64/vmm/io/ppt.c
==============================================================================
--- head/sys/amd64/vmm/io/ppt.c Tue Sep 6 21:01:38 2016 (r305501)
+++ head/sys/amd64/vmm/io/ppt.c Tue Sep 6 21:15:35 2016 (r305502)
@@ -362,6 +362,11 @@ ppt_assign_device(struct vm *vm, int bus
if (ppt->vm != NULL && ppt->vm != vm)
return (EBUSY);
+ pci_save_state(ppt->dev);
+ pcie_flr(ppt->dev,
+ max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
+ true);
+ pci_restore_state(ppt->dev);
ppt->vm = vm;
iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev));
iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
@@ -382,6 +387,12 @@ ppt_unassign_device(struct vm *vm, int b
*/
if (ppt->vm != vm)
return (EBUSY);
+
+ pci_save_state(ppt->dev);
+ pcie_flr(ppt->dev,
+ max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
+ true);
+ pci_restore_state(ppt->dev);
ppt_unmap_mmio(vm, ppt);
ppt_teardown_msi(ppt);
ppt_teardown_msix(ppt);
Modified: head/sys/dev/pci/pci.c
==============================================================================
--- head/sys/dev/pci/pci.c Tue Sep 6 21:01:38 2016 (r305501)
+++ head/sys/dev/pci/pci.c Tue Sep 6 21:15:35 2016 (r305502)
@@ -5892,3 +5892,165 @@ pci_find_pcie_root_port(device_t dev)
dev = pcib;
}
}
+
+/*
+ * Wait for pending transactions to complete on a PCI-express function.
+ *
+ * The maximum delay is specified in milliseconds in max_delay. Note
+ * that this function may sleep.
+ *
+ * Returns true if the function is idle and false if the timeout is
+ * exceeded. If dev is not a PCI-express function, this returns true.
+ */
+bool
+pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ uint16_t sta;
+ int cap;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
+ return (true);
+
+ sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
+ while (sta & PCIEM_STA_TRANSACTION_PND) {
+ if (max_delay == 0)
+ return (false);
+
+ /* Poll once every 100 milliseconds up to the timeout. */
+ if (max_delay > 100) {
+ pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
+ max_delay -= 100;
+ } else {
+ pause_sbt("pcietp", max_delay * SBT_1MS, 0,
+ C_HARDCLOCK);
+ max_delay = 0;
+ }
+ sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
+ }
+
+ return (true);
+}
+
+/*
+ * Determine the maximum Completion Timeout in microseconds.
+ *
+ * For non-PCI-express functions this returns 0.
+ */
+int
+pcie_get_max_completion_timeout(device_t dev)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ int cap;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
+ return (0);
+
+ /*
+ * Functions using the 1.x spec use the default timeout range of
+ * 50 microseconds to 50 milliseconds. Functions that do not
+ * support programmable timeouts also use this range.
+ */
+ if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
+ (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
+ PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
+ return (50 * 1000);
+
+ switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
+ PCIEM_CTL2_COMP_TIMO_VAL) {
+ case PCIEM_CTL2_COMP_TIMO_100US:
+ return (100);
+ case PCIEM_CTL2_COMP_TIMO_10MS:
+ return (10 * 1000);
+ case PCIEM_CTL2_COMP_TIMO_55MS:
+ return (55 * 1000);
+ case PCIEM_CTL2_COMP_TIMO_210MS:
+ return (210 * 1000);
+ case PCIEM_CTL2_COMP_TIMO_900MS:
+ return (900 * 1000);
+ case PCIEM_CTL2_COMP_TIMO_3500MS:
+ return (3500 * 1000);
+ case PCIEM_CTL2_COMP_TIMO_13S:
+ return (13 * 1000 * 1000);
+ case PCIEM_CTL2_COMP_TIMO_64S:
+ return (64 * 1000 * 1000);
+ default:
+ return (50 * 1000);
+ }
+}
+
+/*
+ * Perform a Function Level Reset (FLR) on a device.
+ *
+ * This function first waits for any pending transactions to complete
+ * within the timeout specified by max_delay. If transactions are
+ * still pending, the function will return false without attempting a
+ * reset.
+ *
+ * If dev is not a PCI-express function or does not support FLR, this
+ * function returns false.
+ *
+ * Note that no registers are saved or restored. The caller is
+ * responsible for saving and restoring any registers including
+ * PCI-standard registers via pci_save_state() and
+ * pci_restore_state().
+ */
+bool
+pcie_flr(device_t dev, u_int max_delay, bool force)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ uint16_t cmd, ctl;
+ int compl_delay;
+ int cap;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
+ return (false);
+
+ if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
+ return (false);
+
+ /*
+ * Disable busmastering to prevent generation of new
+ * transactions while waiting for the device to go idle. If
+ * the idle timeout fails, the command register is restored
+ * which will re-enable busmastering.
+ */
+ cmd = pci_read_config(dev, PCIR_COMMAND, 2);
+ pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
+ if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
+ if (!force) {
+ pci_write_config(dev, PCIR_COMMAND, cmd, 2);
+ return (false);
+ }
+ pci_printf(&dinfo->cfg,
+ "Resetting with transactions pending after %d ms\n",
+ max_delay);
+
+ /*
+ * Extend the post-FLR delay to cover the maximum
+ * Completion Timeout delay of anything in flight
+ * during the FLR delay. Enforce a minimum delay of
+ * at least 10ms.
+ */
+ compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
+ if (compl_delay < 10)
+ compl_delay = 10;
+ } else
+ compl_delay = 0;
+
+ /* Initiate the reset. */
+ ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
+ pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
+ PCIEM_CTL_INITIATE_FLR, 2);
+
+ /* Wait for 100ms. */
+ pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
+
+ if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
+ PCIEM_STA_TRANSACTION_PND)
+ pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
+ return (true);
+}
Modified: head/sys/dev/pci/pcireg.h
==============================================================================
--- head/sys/dev/pci/pcireg.h Tue Sep 6 21:01:38 2016 (r305501)
+++ head/sys/dev/pci/pcireg.h Tue Sep 6 21:15:35 2016 (r305502)
@@ -885,10 +885,25 @@
#define PCIEM_ROOT_STA_PME_STATUS 0x00010000
#define PCIEM_ROOT_STA_PME_PEND 0x00020000
#define PCIER_DEVICE_CAP2 0x24
-#define PCIEM_CAP2_ARI 0x20
+#define PCIEM_CAP2_COMP_TIMO_RANGES 0x0000000f
+#define PCIEM_CAP2_COMP_TIMO_RANGE_A 0x00000001
+#define PCIEM_CAP2_COMP_TIMO_RANGE_B 0x00000002
+#define PCIEM_CAP2_COMP_TIMO_RANGE_C 0x00000004
+#define PCIEM_CAP2_COMP_TIMO_RANGE_D 0x00000008
+#define PCIEM_CAP2_COMP_TIMO_DISABLE 0x00000010
+#define PCIEM_CAP2_ARI 0x00000020
#define PCIER_DEVICE_CTL2 0x28
-#define PCIEM_CTL2_COMP_TIMEOUT_VAL 0x000f
-#define PCIEM_CTL2_COMP_TIMEOUT_DIS 0x0010
+#define PCIEM_CTL2_COMP_TIMO_VAL 0x000f
+#define PCIEM_CTL2_COMP_TIMO_50MS 0x0000
+#define PCIEM_CTL2_COMP_TIMO_100US 0x0001
+#define PCIEM_CTL2_COMP_TIMO_10MS 0x0002
+#define PCIEM_CTL2_COMP_TIMO_55MS 0x0005
+#define PCIEM_CTL2_COMP_TIMO_210MS 0x0006
+#define PCIEM_CTL2_COMP_TIMO_900MS 0x0009
+#define PCIEM_CTL2_COMP_TIMO_3500MS 0x000a
+#define PCIEM_CTL2_COMP_TIMO_13S 0x000d
+#define PCIEM_CTL2_COMP_TIMO_64S 0x000e
+#define PCIEM_CTL2_COMP_TIMO_DISABLE 0x0010
#define PCIEM_CTL2_ARI 0x0020
#define PCIEM_CTL2_ATOMIC_REQ_ENABLE 0x0040
#define PCIEM_CTL2_ATOMIC_EGR_BLOCK 0x0080
Modified: head/sys/dev/pci/pcivar.h
==============================================================================
--- head/sys/dev/pci/pcivar.h Tue Sep 6 21:01:38 2016 (r305501)
+++ head/sys/dev/pci/pcivar.h Tue Sep 6 21:15:35 2016 (r305502)
@@ -595,7 +595,9 @@ uint32_t pcie_read_config(device_t dev,
void pcie_write_config(device_t dev, int reg, uint32_t value, int width);
uint32_t pcie_adjust_config(device_t dev, int reg, uint32_t mask,
uint32_t value, int width);
-
+bool pcie_flr(device_t dev, u_int max_delay, bool force);
+int pcie_get_max_completion_timeout(device_t dev);
+bool pcie_wait_for_pending_transactions(device_t dev, u_int max_delay);
#ifdef BUS_SPACE_MAXADDR
#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
More information about the svn-src-all
mailing list