PERFORCE change 206989 for review
John Baldwin
jhb at FreeBSD.org
Mon Feb 27 19:00:56 UTC 2012
http://p4web.freebsd.org/@@206989?ac=10
Change 206989 by jhb at jhb_jhbbsd on 2012/02/27 19:00:32
Import my current WIP to implement POSIX_FADV_WILLNEED for UFS.
Affected files ...
.. //depot/projects/fadvise/sys/kern/vfs_bio.c#5 edit
.. //depot/projects/fadvise/sys/kern/vfs_cluster.c#3 edit
.. //depot/projects/fadvise/sys/sys/buf.h#2 edit
.. //depot/projects/fadvise/sys/ufs/ffs/ffs_vnops.c#3 edit
Differences ...
==== //depot/projects/fadvise/sys/kern/vfs_bio.c#5 (text+ko) ====
@@ -2664,8 +2664,10 @@
if (error == ENOLCK)
goto loop;
/* We timed out or were interrupted. */
- else if (error)
+ else if (error) {
+ CTR4(KTR_BUF, "getblk(%p, %ld, %d) failed %d", vp, (long)blkno, size, error);
return (NULL);
+ }
/*
* The buffer is locked. B_CACHE is cleared if the buffer is
@@ -2787,8 +2789,16 @@
bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags);
if (bp == NULL) {
- if (slpflag || slptimeo)
+ /*
+ * XXX: Should this also return NULL if
+ * GB_NOWAIT_BD is set?
+ */
+ if (slpflag || slptimeo) {
+ CTR3(KTR_BUF,
+ "getblk(%p, %ld, %d) failed getnewbuf()",
+ vp, (long)blkno, size);
return NULL;
+ }
goto loop;
}
==== //depot/projects/fadvise/sys/kern/vfs_cluster.c#3 (text+ko) ====
@@ -39,6 +39,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/bio.h>
#include <sys/buf.h>
@@ -64,8 +65,10 @@
cluster_collectbufs(struct vnode *vp, struct buf *last_bp);
static struct buf *
cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
- daddr_t blkno, long size, int run, struct buf *fbp);
+ daddr_t blkno, long size, int run, struct buf *fbp, int gbflags);
static void cluster_callback(struct buf *);
+static void cluster_ra(struct vnode *vp, u_quad_t filesize, daddr_t flbn,
+ daddr_t elbn, long size, int racluster, int gbflags);
static int write_behind = 1;
SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0,
@@ -75,6 +78,19 @@
SYSCTL_INT(_vfs, OID_AUTO, read_max, CTLFLAG_RW, &read_max, 0,
"Cluster read-ahead max block count");
+SYSCTL_NODE(_vfs, OID_AUTO, cluster, CTLFLAG_RD, NULL, "");
+
+static int ra_fails;
+SYSCTL_INT(_vfs_cluster, OID_AUTO, ra_fails, CTLFLAG_RD, &ra_fails, 0, "");
+static int rbuild_fails;
+SYSCTL_INT(_vfs_cluster, OID_AUTO, rbuild_fails, CTLFLAG_RD, &rbuild_fails, 0,
+ "");
+static int ra_clusters;
+SYSCTL_INT(_vfs_cluster, OID_AUTO, ra_clusters, CTLFLAG_RD, &ra_clusters, 0,
+ "");
+static int ra_singles;
+SYSCTL_INT(_vfs_cluster, OID_AUTO, ra_singles, CTLFLAG_RD, &ra_singles, 0, "");
+
/* Page expended to mark partially backed buffers */
extern vm_page_t bogus_page;
@@ -208,7 +224,7 @@
if (ncontig < nblks)
nblks = ncontig;
bp = cluster_rbuild(vp, filesize, lblkno,
- blkno, size, nblks, bp);
+ blkno, size, nblks, bp, 0);
lblkno += (bp->b_bufsize / size);
} else {
bp->b_flags |= B_RAM;
@@ -236,11 +252,69 @@
/*
* If we have been doing sequential I/O, then do some read-ahead.
*/
- while (lblkno < (origblkno + maxra)) {
+ cluster_ra(vp, filesize, lblkno, origblkno + maxra, size, racluster, 0);
+
+ if (reqbp)
+ return (bufwait(reqbp));
+ else
+ return (error);
+}
+
+/*
+ * Perform asynchronous read-ahead clustering reads for contiguous blocks
+ * if possible. Returns the amount of I/O it attempted to schedule.
+ */
+long
+cluster_readahead(vp, filesize, lblkno, size)
+ struct vnode *vp;
+ u_quad_t filesize;
+ daddr_t lblkno;
+ long size;
+{
+ int maxra, racluster;
+
+ /*
+ * Try to limit the amount of read-ahead by a few
+ * ad-hoc parameters. This needs work!!!
+ */
+ racluster = vp->v_mount->mnt_iosize_max / size;
+ maxra = min(nbuf/8, read_max);
+ if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
+ maxra = (filesize / size) - lblkno;
+ CTR3(KTR_BUF, "cluster_readahead(%p, %ld) using maxra %d", vp, lblkno,
+ maxra);
+ cluster_ra(vp, filesize, lblkno, lblkno + maxra, size, racluster,
+ /* GB_NOWAIT_BD | */ GB_LOCK_NOWAIT);
+ return (maxra * size);
+}
+
+static void
+cluster_ra(vp, filesize, flbn, elbn, size, racluster, gbflags)
+ struct vnode *vp;
+ u_quad_t filesize;
+ daddr_t flbn;
+ daddr_t elbn;
+ long size;
+ int racluster;
+ int gbflags;
+{
+ struct buf *rbp;
+ daddr_t blkno, lblkno;
+#ifdef KTR
+ daddr_t old;
+#endif
+ int error, ncontig;
+
+ for (lblkno = flbn; lblkno < elbn; ) {
+#ifdef KTR
+ old = lblkno;
+#endif
error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
if (error)
break;
+ CTR4(KTR_BUF, "cluster_ra: VOP_BMAP(%p, %ld) returned %ld, %d",
+ vp, lblkno, blkno, ncontig);
if (blkno == -1)
break;
@@ -252,22 +326,46 @@
if (ncontig) {
ncontig = min(ncontig + 1, racluster);
rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
- size, ncontig, NULL);
+ size, ncontig, NULL, gbflags);
+ if (rbp == NULL) {
+ CTR2(KTR_BUF, "cluster_rbuild(%p, %ld) failed",
+ vp, lblkno);
+ lblkno += 1;
+ continue;
+ }
lblkno += (rbp->b_bufsize / size);
if (rbp->b_flags & B_DELWRI) {
+ CTR2(KTR_BUF,
+ "cluster_ra: cluster for %ld,%d has B_DELWRI",
+ old, rbp->b_bufsize / size);
bqrelse(rbp);
continue;
}
+ CTR2(KTR_BUF,
+ "cluster_ra: scheduling cluster %ld,%d",
+ old, rbp->b_bufsize / size);
+ ra_clusters++;
} else {
- rbp = getblk(vp, lblkno, size, 0, 0, 0);
+ rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
lblkno += 1;
+ if (rbp == NULL) {
+ CTR2(KTR_BUF,
+ "cluster_ra: getblk(%p, %ld) failed",
+ vp, lblkno);
+ ra_fails++;
+ continue;
+ }
if (rbp->b_flags & B_DELWRI) {
+ CTR1(KTR_BUF,
+ "cluster_ra: block %ld has B_DELWRI", old);
bqrelse(rbp);
continue;
}
rbp->b_flags |= B_ASYNC | B_RAM;
rbp->b_iocmd = BIO_READ;
rbp->b_blkno = blkno;
+ CTR1(KTR_BUF, "cluster_ra: scheduling block %ld", old);
+ ra_singles++;
}
if (rbp->b_flags & B_CACHE) {
rbp->b_flags &= ~B_ASYNC;
@@ -285,11 +383,6 @@
bstrategy(rbp);
curthread->td_ru.ru_inblock++;
}
-
- if (reqbp)
- return (bufwait(reqbp));
- else
- return (error);
}
/*
@@ -298,7 +391,7 @@
* and then parcel them up into logical blocks in the buffer hash table.
*/
static struct buf *
-cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
+cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp, gbflags)
struct vnode *vp;
u_quad_t filesize;
daddr_t lbn;
@@ -306,6 +399,7 @@
long size;
int run;
struct buf *fbp;
+ int gbflags;
{
struct bufobj *bo;
struct buf *bp, *tbp;
@@ -329,8 +423,10 @@
tbp = fbp;
tbp->b_iocmd = BIO_READ;
} else {
- tbp = getblk(vp, lbn, size, 0, 0, 0);
- if (tbp->b_flags & B_CACHE)
+ tbp = getblk(vp, lbn, size, 0, 0, gbflags);
+ if (tbp == NULL)
+ rbuild_fails++;
+ if (tbp == NULL || tbp->b_flags & B_CACHE)
return tbp;
tbp->b_flags |= B_ASYNC | B_RAM;
tbp->b_iocmd = BIO_READ;
==== //depot/projects/fadvise/sys/sys/buf.h#2 (text+ko) ====
@@ -504,6 +504,7 @@
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, struct buf **);
+long cluster_readahead(struct vnode *, u_quad_t, daddr_t, long);
int cluster_wbuild(struct vnode *, long, daddr_t, int);
void cluster_write(struct vnode *, struct buf *, u_quad_t, int);
void vfs_bio_set_valid(struct buf *, int base, int size);
==== //depot/projects/fadvise/sys/ufs/ffs/ffs_vnops.c#3 (text+ko) ====
@@ -70,6 +70,7 @@
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/extattr.h>
+#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/malloc.h>
@@ -100,6 +101,7 @@
#ifdef DIRECTIO
extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
#endif
+static vop_advise_t ffs_advise;
static vop_fsync_t ffs_fsync;
static vop_lock1_t ffs_lock;
static vop_getpages_t ffs_getpages;
@@ -124,6 +126,7 @@
.vop_fsync = ffs_fsync,
.vop_getpages = ffs_getpages,
.vop_lock1 = ffs_lock,
+ .vop_advise = ffs_advise,
.vop_read = ffs_read,
.vop_reallocblks = ffs_reallocblks,
.vop_write = ffs_write,
@@ -143,6 +146,7 @@
.vop_fsync = ffs_fsync,
.vop_getpages = ffs_getpages,
.vop_lock1 = ffs_lock,
+ .vop_advise = ffs_advise,
.vop_read = ffs_read,
.vop_reallocblks = ffs_reallocblks,
.vop_write = ffs_write,
@@ -399,6 +403,78 @@
#endif
}
+static int
+ffs_advise(ap)
+ struct vop_advise_args /* {
+ struct vnode *a_vp;
+ off_t a_start;
+ off_t a_end;
+ int a_advice;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct inode *ip;
+ struct fs *fs;
+ off_t start, end;
+ size_t resid;
+ ufs_lbn_t lbn, endblkno;
+ long size, blkoffset;
+ int xfersize;
+
+ switch (ap->a_advice) {
+ case POSIX_FADV_WILLNEED:
+ vp = ap->a_vp;
+ start = ap->a_start;
+ end = ap->a_end;
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ if (vp->v_iflag & VI_DOOMED) {
+ VOP_UNLOCK(vp, 0);
+ return (EBADF);
+ }
+ KASSERT(vp->v_type == VREG, ("FADV_WILLNEED on bad vnode"));
+ ip = VTOI(vp);
+ if (start >= ip->i_size) {
+ VOP_UNLOCK(vp, 0);
+ return (0);
+ }
+ if (end >= ip->i_size)
+ end = ip->i_size - 1;
+ resid = end - start + 1;
+ fs = ip->i_fs;
+
+ /* HACK: Prefetch indirect blocks for this range. */
+ endblkno = lblkno(fs, end);
+ for (lbn = NDADDR; lbn < endblkno; lbn += NINDIR(fs))
+ breada(vp, &lbn, &fs->fs_bsize, 1, NOCRED);
+
+ while (resid > 0) {
+ /* Limit the number of read ahead buffers. */
+ if (runningbufspace > hibufspace / 2)
+ break;
+ lbn = lblkno(fs, start);
+ size = blksize(fs, ip, lbn);
+ blkoffset = blkoff(fs, start);
+ if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+ resid += blkoffset;
+ start -= blkoffset;
+ xfersize = cluster_readahead(vp, ip->i_size,
+ lbn, size);
+ } else {
+ xfersize = fs->fs_bsize - blkoffset;
+ if (resid < xfersize)
+ xfersize = resid;
+ breada(vp, &lbn, &xfersize, 1, NOCRED);
+ }
+ resid -= xfersize;
+ start += xfersize;
+ }
+ VOP_UNLOCK(vp, 0);
+ return (0);
+ default:
+ return (vop_stdadvise(ap));
+ }
+}
+
/*
* Vnode op for reading.
*/
More information about the p4-projects
mailing list