NVMe performance 4x slower than expected
Konstantin Belousov
kostikbel at gmail.com
Wed Apr 1 21:23:15 UTC 2015
On Wed, Apr 01, 2015 at 10:52:18PM +0200, Tobias Oberstein wrote:
> > > FreeBSD 11 Current with patches (DMAR and ZFS patches, otherwise the box
> > > doesn't boot at all .. because of 3TB RAM and the amount of periphery).
> >
> > Do you still have WITNESS and INVARIANTS turned on in your kernel
> > config? They're turned on by default for Current, but they do have
> > some performance impact. To turn them off, just build a
> > GENERIC-NODEBUG kernel .
>
> WITNESS is off, INVARIANTS is still on.
INVARIANTS are costly.
>
> Here is complete config:
>
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_kernel_conf.md
>
> This is the aggregated patch (work was done by Konstantin - thanks again
> btw!)
>
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_patch.md
>
> > Could you also post full dmesg output as well as vmstat -i?
>
> dmesg:
>
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_dmesg.md
>
> vmstat:
>
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_vmstat.md
>
> ===
>
> Here are results from FIO under FreeBSD:
>
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd.md
>
> Here are results using _same_ FIO control file under Linux:
>
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/linux.md
Is this vmstat after the test ?
Somewhat funny is that nvme does not use MSI(X).
I have the following patch for a long time, it allowed to increase pps
in iperf and similar tests when DMAR is enabled. In your case it could
reduce the rate of the DMAR interrupts.
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index a18adcf..b23a4c1 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -586,6 +586,18 @@ dmar_ctx_unload_entry(struct dmar_map_entry *entry, bool free)
}
}
+static struct dmar_qi_genseq *
+dmar_ctx_unload_gseq(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ struct dmar_qi_genseq *gseq)
+{
+
+ if (TAILQ_NEXT(entry, dmamap_link) != NULL)
+ return (NULL);
+ if (ctx->batch_no++ % dmar_batch_coalesce != 0)
+ return (NULL);
+ return (gseq);
+}
+
void
dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
bool cansleep)
@@ -619,8 +631,7 @@ dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
entry->gseq.gen = 0;
entry->gseq.seq = 0;
dmar_qi_invalidate_locked(ctx, entry->start, entry->end -
- entry->start, TAILQ_NEXT(entry, dmamap_link) == NULL ?
- &gseq : NULL);
+ entry->start, dmar_ctx_unload_gseq(ctx, entry, &gseq));
}
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
entry->gseq = gseq;
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 2865ab5..6e0ab7f 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -93,6 +93,7 @@ struct dmar_ctx {
u_int entries_cnt;
u_long loads;
u_long unloads;
+ u_int batch_no;
struct dmar_gas_entries_tree rb_root;
struct dmar_map_entries_tailq unload_entries; /* Entries to unload */
struct dmar_map_entry *first_place, *last_place;
@@ -339,6 +340,7 @@ extern dmar_haddr_t dmar_high;
extern int haw;
extern int dmar_tbl_pagecnt;
extern int dmar_match_verbose;
+extern int dmar_batch_coalesce;
extern int dmar_check_free;
static inline uint32_t
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index c239579..e7dc3f9 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -153,7 +153,7 @@ dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
return (1);
}
-static int dmar_enable = 0;
+static int dmar_enable = 1;
static void
dmar_identify(driver_t *driver, device_t parent)
{
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c
index f696f9d..d3c3267 100644
--- a/sys/x86/iommu/intel_utils.c
+++ b/sys/x86/iommu/intel_utils.c
@@ -624,6 +624,7 @@ dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
}
int dmar_match_verbose;
+int dmar_batch_coalesce = 100;
static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL, "");
SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
@@ -632,6 +633,9 @@ SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RWTUN,
&dmar_match_verbose, 0,
"Verbose matching of the PCI devices to DMAR paths");
+SYSCTL_INT(_hw_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RW | CTLFLAG_TUN,
+ &dmar_batch_coalesce, 0,
+ "Number of qi batches between interrupt");
#ifdef INVARIANTS
int dmar_check_free;
SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RWTUN,
More information about the freebsd-hackers
mailing list