git: 987fca954dc8 - stable/13 - md: Get rid of the pbuf zone
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 23 Jun 2023 13:53:38 UTC
The branch stable/13 has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=987fca954dc805e5135c13b6efa28a8174d35377 commit 987fca954dc805e5135c13b6efa28a8174d35377 Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2023-05-23 14:14:06 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2023-06-23 13:33:49 +0000 md: Get rid of the pbuf zone The zone is used solely to provide KVA for mapping BIOs so that we can pass mapped buffers to VOP_READ and VOP_WRITE. Currently we preallocate nswbuf/10 bufs for this purpose during boot. The intent was to limit KVA usage on 32-bit systems, but the preallocation means that we in fact consumed more KVA than needed unless one has more than nswbuf/10 (typically 25) vnode-backed MD devices in existence, which I would argue is the uncommon case. Meanwhile, all I/O to an MD is handled by a dedicated thread, so we can instead simply preallocate the KVA region at MD device creation time. Event: BSDCan 2023 Reviewed by: kib MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D40215 (cherry picked from commit 30038a8b4efc6d0b9f8f295e28bc205fe9728310) --- sys/dev/md/md.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index 2e941c991ddb..52d64d6721cd 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -97,6 +97,7 @@ #include <geom/geom_int.h> #include <vm/vm.h> +#include <vm/vm_extern.h> #include <vm/vm_param.h> #include <vm/vm_object.h> #include <vm/vm_page.h> @@ -231,8 +232,6 @@ static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); #define NMASK (NINDIR-1) static int nshift; -static uma_zone_t md_pbuf_zone; - struct indir { uintptr_t *array; u_int total; @@ -274,6 +273,7 @@ struct md_s { char file[PATH_MAX]; char label[PATH_MAX]; struct ucred *cred; + vm_offset_t kva; /* MD_SWAP related fields */ vm_object_t object; @@ -875,11 +875,11 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) struct iovec *piov; struct mount *mp; struct vnode *vp; - struct buf *pb; bus_dma_segment_t *vlist; struct thread *td; off_t iolen, iostart, len, zerosize; int ma_offs, npages; + bool mapped; switch (bp->bio_cmd) { case BIO_READ: @@ -897,10 +897,10 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) td = curthread; vp = sc->vnode; - pb = NULL; piov = NULL; ma_offs = bp->bio_ma_offset; len = bp->bio_length; + mapped = false; /* * VNODE I/O @@ -962,22 +962,21 @@ mdstart_vnode(struct md_s *sc, struct bio *bp) auio.uio_iovcnt = piov - auio.uio_iov; piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { - pb = uma_zalloc(md_pbuf_zone, M_WAITOK); - MPASS((pb->b_flags & B_MAXPHYS) != 0); bp->bio_resid = len; unmapped_step: npages = atop(min(maxphys, round_page(len + (ma_offs & PAGE_MASK)))); iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); KASSERT(iolen > 0, ("zero iolen")); - pmap_qenter((vm_offset_t)pb->b_data, - &bp->bio_ma[atop(ma_offs)], npages); - aiov.iov_base = (void *)((vm_offset_t)pb->b_data + - (ma_offs & PAGE_MASK)); + KASSERT(npages <= atop(MAXPHYS + PAGE_SIZE), + ("npages %d too large", npages)); + pmap_qenter(sc->kva, &bp->bio_ma[atop(ma_offs)], npages); + aiov.iov_base = (void *)(sc->kva + (ma_offs & PAGE_MASK)); aiov.iov_len = iolen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_resid = iolen; + mapped = true; } else { aiov.iov_base = bp->bio_data; aiov.iov_len = bp->bio_length; @@ -1005,8 +1004,8 @@ unmapped_step: VOP_ADVISE(vp, iostart, auio.uio_offset - 1, POSIX_FADV_DONTNEED); - if (pb != NULL) { - pmap_qremove((vm_offset_t)pb->b_data, npages); + if (mapped) { + pmap_qremove(sc->kva, npages); if (error == 0) { len -= iolen; bp->bio_resid -= iolen; @@ -1014,7 +1013,6 @@ unmapped_step: if (len > 0) goto unmapped_step; } - uma_zfree(md_pbuf_zone, pb); } else { bp->bio_resid = auio.uio_resid; } @@ -1283,7 +1281,7 @@ mdnew(int unit, int *errp, enum md_types type) return (NULL); } - sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); + sc = malloc(sizeof(*sc), M_MD, M_WAITOK | M_ZERO); sc->type = type; bioq_init(&sc->bio_queue); mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); @@ -1483,6 +1481,8 @@ mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td) nd.ni_vp->v_vflag &= ~VV_MD; goto bad; } + + sc->kva = kva_alloc(MAXPHYS + PAGE_SIZE); return (0); bad: VOP_UNLOCK(nd.ni_vp); @@ -1541,6 +1541,8 @@ mddestroy(struct md_s *sc, struct thread *td) destroy_indir(sc, sc->indir); if (sc->uma) uma_zdestroy(sc->uma); + if (sc->kva) + kva_free(sc->kva, MAXPHYS + PAGE_SIZE); LIST_REMOVE(sc, list); free_unr(md_uh, sc->unit); @@ -2074,7 +2076,6 @@ g_md_init(struct g_class *mp __unused) sx_xunlock(&md_sx); } } - md_pbuf_zone = pbuf_zsecond_create("mdpbuf", nswbuf / 10); status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 0600, MDCTL_NAME); g_topology_lock(); @@ -2170,6 +2171,5 @@ g_md_fini(struct g_class *mp __unused) sx_destroy(&md_sx); if (status_dev != NULL) destroy_dev(status_dev); - uma_zdestroy(md_pbuf_zone); delete_unrhdr(md_uh); }