svn commit: r191813 - in stable/7/sys: . contrib/pf dev/ath/ath_hal
dev/cxgb gnu/fs/xfs/FreeBSD kern sys ufs/ffs
Konstantin Belousov
kib at FreeBSD.org
Tue May 5 10:34:44 UTC 2009
Author: kib
Date: Tue May 5 10:34:43 2009
New Revision: 191813
URL: http://svn.freebsd.org/changeset/base/191813
Log:
MFC r189878:
Fix two issues with bufdaemon, often causing the processes to hang in
the "nbufkv" sleep.
Do not retry request for the new block from ffs_bufwrite() when write is done
from bufdaemon and there is a buffer shortage.
In getnewbuf(), help bufdaemon to flush dirty buffers owned by the vnode
locked by curthread.
For MFC, default value for sysctl vfs.flushbufqtarget is set to -1,
disabling the helpers. The TDP_BUFNEED flag value from HEAD conflicts
with TDP_UPCALLING KSE bit, so it is moved to the end of allocated bits.
Modified:
stable/7/sys/ (props changed)
stable/7/sys/contrib/pf/ (props changed)
stable/7/sys/dev/ath/ath_hal/ (props changed)
stable/7/sys/dev/cxgb/ (props changed)
stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c
stable/7/sys/kern/vfs_bio.c
stable/7/sys/sys/buf.h
stable/7/sys/sys/proc.h
stable/7/sys/ufs/ffs/ffs_vfsops.c
Modified: stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c
==============================================================================
--- stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c Tue May 5 09:24:20 2009 (r191812)
+++ stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c Tue May 5 10:34:43 2009 (r191813)
@@ -81,7 +81,7 @@ xfs_buf_get_empty(size_t size, xfs_buft
{
struct buf *bp;
- bp = geteblk(0);
+ bp = geteblk(0, 0);
if (bp != NULL) {
bp->b_bufsize = size;
bp->b_bcount = size;
@@ -101,7 +101,7 @@ xfs_buf_get_noaddr(size_t len, xfs_bufta
if (len >= MAXPHYS)
return (NULL);
- bp = geteblk(len);
+ bp = geteblk(len, 0);
if (bp != NULL) {
KASSERT(BUF_REFCNT(bp) == 1,
("xfs_buf_get_empty: bp %p not locked",bp));
Modified: stable/7/sys/kern/vfs_bio.c
==============================================================================
--- stable/7/sys/kern/vfs_bio.c Tue May 5 09:24:20 2009 (r191812)
+++ stable/7/sys/kern/vfs_bio.c Tue May 5 10:34:43 2009 (r191813)
@@ -105,7 +105,8 @@ static void vfs_setdirty_locked_object(s
static void vfs_vmio_release(struct buf *bp);
static int vfs_bio_clcheck(struct vnode *vp, int size,
daddr_t lblkno, daddr_t blkno);
-static int flushbufqueues(int, int);
+static int buf_do_flush(struct vnode *vp);
+static int flushbufqueues(struct vnode *, int, int);
static void buf_daemon(void);
static void bremfreel(struct buf *bp);
@@ -187,6 +188,9 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufcall
static int getnewbufrestarts;
SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0,
"Number of times getnewbuf has had to restart a buffer aquisition");
+static int flushbufqtarget = -1;
+SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0,
+ "Amount of work to do in flushbufqueues when helping bufdaemon");
/*
* Wakeup point for bufdaemon, as well as indicator of whether it is already
@@ -258,6 +262,7 @@ static struct mtx bpinlock;
#define QUEUE_DIRTY_GIANT 3 /* B_DELWRI buffers that need giant */
#define QUEUE_EMPTYKVA 4 /* empty buffer headers w/KVA assignment */
#define QUEUE_EMPTY 5 /* empty buffer headers */
+#define QUEUE_SENTINEL 1024 /* not an queue index, but mark for sentinel */
/* Queues for free buffers with various properties */
static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
@@ -1707,21 +1712,23 @@ vfs_bio_awrite(struct buf *bp)
*/
static struct buf *
-getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
+getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize,
+ int gbflags)
{
+ struct thread *td;
struct buf *bp;
struct buf *nbp;
int defrag = 0;
int nqindex;
static int flushingbufs;
+ td = curthread;
/*
* We can't afford to block since we might be holding a vnode lock,
* which may prevent system daemons from running. We deal with
* low-memory situations by proactively returning memory and running
* async I/O rather then sync I/O.
*/
-
atomic_add_int(&getnewbufcalls, 1);
atomic_subtract_int(&getnewbufrestarts, 1);
restart:
@@ -1953,8 +1960,9 @@ restart:
*/
if (bp == NULL) {
- int flags;
+ int flags, norunbuf;
char *waitmsg;
+ int fl;
if (defrag) {
flags = VFS_BIO_NEED_BUFSPACE;
@@ -1972,9 +1980,35 @@ restart:
mtx_unlock(&bqlock);
bd_speedup(); /* heeeelp */
+ if (gbflags & GB_NOWAIT_BD)
+ return (NULL);
mtx_lock(&nblock);
while (needsbuffer & flags) {
+ if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) {
+ mtx_unlock(&nblock);
+ /*
+ * getblk() is called with a vnode
+ * locked, and some majority of the
+ * dirty buffers may as well belong to
+ * the vnode. Flushing the buffers
+ * there would make a progress that
+ * cannot be achieved by the
+ * buf_daemon, that cannot lock the
+ * vnode.
+ */
+ norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
+ (td->td_pflags & TDP_NORUNNINGBUF);
+ /* play bufdaemon */
+ td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
+ fl = buf_do_flush(vp);
+ td->td_pflags &= norunbuf;
+ mtx_lock(&nblock);
+ if (fl != 0)
+ continue;
+ if ((needsbuffer & flags) == 0)
+ break;
+ }
if (msleep(&needsbuffer, &nblock,
(PRIBIO + 4) | slpflag, waitmsg, slptimeo)) {
mtx_unlock(&nblock);
@@ -2043,6 +2077,35 @@ static struct kproc_desc buf_kp = {
};
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
+static int
+buf_do_flush(struct vnode *vp)
+{
+ int flushed;
+
+ flushed = flushbufqueues(vp, QUEUE_DIRTY, 0);
+ /* The list empty check here is slightly racy */
+ if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) {
+ mtx_lock(&Giant);
+ flushed += flushbufqueues(vp, QUEUE_DIRTY_GIANT, 0);
+ mtx_unlock(&Giant);
+ }
+ if (flushed == 0) {
+ /*
+ * Could not find any buffers without rollback
+ * dependencies, so just write the first one
+ * in the hopes of eventually making progress.
+ */
+ flushbufqueues(vp, QUEUE_DIRTY, 1);
+ if (!TAILQ_EMPTY(
+ &bufqueues[QUEUE_DIRTY_GIANT])) {
+ mtx_lock(&Giant);
+ flushbufqueues(vp, QUEUE_DIRTY_GIANT, 1);
+ mtx_unlock(&Giant);
+ }
+ }
+ return (flushed);
+}
+
static void
buf_daemon()
{
@@ -2056,7 +2119,7 @@ buf_daemon()
/*
* This process is allowed to take the buffer cache to the limit
*/
- curthread->td_pflags |= TDP_NORUNNINGBUF;
+ curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED;
mtx_lock(&bdlock);
for (;;) {
bd_request = 0;
@@ -2071,30 +2134,8 @@ buf_daemon()
* normally would so they can run in parallel with our drain.
*/
while (numdirtybuffers > lodirtybuffers) {
- int flushed;
-
- flushed = flushbufqueues(QUEUE_DIRTY, 0);
- /* The list empty check here is slightly racy */
- if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) {
- mtx_lock(&Giant);
- flushed += flushbufqueues(QUEUE_DIRTY_GIANT, 0);
- mtx_unlock(&Giant);
- }
- if (flushed == 0) {
- /*
- * Could not find any buffers without rollback
- * dependencies, so just write the first one
- * in the hopes of eventually making progress.
- */
- flushbufqueues(QUEUE_DIRTY, 1);
- if (!TAILQ_EMPTY(
- &bufqueues[QUEUE_DIRTY_GIANT])) {
- mtx_lock(&Giant);
- flushbufqueues(QUEUE_DIRTY_GIANT, 1);
- mtx_unlock(&Giant);
- }
+ if (buf_do_flush(NULL) == 0)
break;
- }
uio_yield();
}
@@ -2140,7 +2181,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps
0, "Number of buffers flushed with dependecies that require rollbacks");
static int
-flushbufqueues(int queue, int flushdeps)
+flushbufqueues(struct vnode *lvp, int queue, int flushdeps)
{
struct thread *td = curthread;
struct buf sentinel;
@@ -2151,20 +2192,37 @@ flushbufqueues(int queue, int flushdeps)
int flushed;
int target;
- target = numdirtybuffers - lodirtybuffers;
- if (flushdeps && target > 2)
- target /= 2;
+ if (lvp == NULL) {
+ target = numdirtybuffers - lodirtybuffers;
+ if (flushdeps && target > 2)
+ target /= 2;
+ } else
+ target = flushbufqtarget;
flushed = 0;
bp = NULL;
+ sentinel.b_qindex = QUEUE_SENTINEL;
mtx_lock(&bqlock);
- TAILQ_INSERT_TAIL(&bufqueues[queue], &sentinel, b_freelist);
+ TAILQ_INSERT_HEAD(&bufqueues[queue], &sentinel, b_freelist);
while (flushed != target) {
- bp = TAILQ_FIRST(&bufqueues[queue]);
- if (bp == &sentinel)
+ bp = TAILQ_NEXT(&sentinel, b_freelist);
+ if (bp != NULL) {
+ TAILQ_REMOVE(&bufqueues[queue], &sentinel, b_freelist);
+ TAILQ_INSERT_AFTER(&bufqueues[queue], bp, &sentinel,
+ b_freelist);
+ } else
break;
- TAILQ_REMOVE(&bufqueues[queue], bp, b_freelist);
- TAILQ_INSERT_TAIL(&bufqueues[queue], bp, b_freelist);
-
+ /*
+ * Skip sentinels inserted by other invocations of the
+ * flushbufqueues(), taking care to not reorder them.
+ */
+ if (bp->b_qindex == QUEUE_SENTINEL)
+ continue;
+ /*
+ * Only flush the buffers that belong to the
+ * vnode locked by the curthread.
+ */
+ if (lvp != NULL && bp->b_vp != lvp)
+ continue;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
continue;
if (bp->b_pin_count > 0) {
@@ -2212,16 +2270,28 @@ flushbufqueues(int queue, int flushdeps)
BUF_UNLOCK(bp);
continue;
}
- if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
+ if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_CANRECURSE,
+ td) == 0) {
mtx_unlock(&bqlock);
CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X",
bp, bp->b_vp, bp->b_flags);
- vfs_bio_awrite(bp);
+ if (curproc == bufdaemonproc)
+ vfs_bio_awrite(bp);
+ else {
+ bremfree(bp);
+ bwrite(bp);
+ }
vn_finished_write(mp);
VOP_UNLOCK(vp, 0, td);
flushwithdeps += hasdeps;
flushed++;
- waitrunningbufspace();
+
+ /*
+ * Sleeping on runningbufspace while holding
+ * vnode lock leads to deadlock.
+ */
+ if (curproc == bufdaemonproc)
+ waitrunningbufspace();
numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
mtx_lock(&bqlock);
continue;
@@ -2603,7 +2673,7 @@ loop:
maxsize = vmio ? size + (offset & PAGE_MASK) : size;
maxsize = imax(maxsize, bsize);
- bp = getnewbuf(slpflag, slptimeo, size, maxsize);
+ bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags);
if (bp == NULL) {
if (slpflag || slptimeo)
return NULL;
@@ -2678,14 +2748,17 @@ loop:
* set to B_INVAL.
*/
struct buf *
-geteblk(int size)
+geteblk(int size, int flags)
{
struct buf *bp;
int maxsize;
maxsize = (size + BKVAMASK) & ~BKVAMASK;
- while ((bp = getnewbuf(0, 0, size, maxsize)) == 0)
- continue;
+ while ((bp = getnewbuf(NULL, 0, 0, size, maxsize, flags)) == NULL) {
+ if ((flags & GB_NOWAIT_BD) &&
+ (curthread->td_pflags & TDP_BUFNEED) != 0)
+ return (NULL);
+ }
allocbuf(bp, size);
bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
KASSERT(BUF_REFCNT(bp) == 1, ("geteblk: bp %p not locked",bp));
Modified: stable/7/sys/sys/buf.h
==============================================================================
--- stable/7/sys/sys/buf.h Tue May 5 09:24:20 2009 (r191812)
+++ stable/7/sys/sys/buf.h Tue May 5 10:34:43 2009 (r191813)
@@ -475,6 +475,7 @@ buf_countdeps(struct buf *bp, int i)
*/
#define GB_LOCK_NOWAIT 0x0001 /* Fail if we block on a buf lock. */
#define GB_NOCREAT 0x0002 /* Don't create a buf if not found. */
+#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
@@ -519,7 +520,7 @@ struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
-struct buf *geteblk(int);
+struct buf *geteblk(int, int);
int bufwait(struct buf *);
int bufwrite(struct buf *);
void bufdone(struct buf *);
Modified: stable/7/sys/sys/proc.h
==============================================================================
--- stable/7/sys/sys/proc.h Tue May 5 09:24:20 2009 (r191812)
+++ stable/7/sys/sys/proc.h Tue May 5 10:34:43 2009 (r191813)
@@ -380,6 +380,7 @@ do { \
#define TDP_INBDFLUSH 0x00100000 /* Already in BO_BDFLUSH, do not recurse */
#define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
#define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */
+#define TDP_BUFNEED 0x02000000 /* Do not recurse into the buf flush */
/*
* Reasons that the current thread can not be run yet.
Modified: stable/7/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- stable/7/sys/ufs/ffs/ffs_vfsops.c Tue May 5 09:24:20 2009 (r191812)
+++ stable/7/sys/ufs/ffs/ffs_vfsops.c Tue May 5 10:34:43 2009 (r191813)
@@ -1842,7 +1842,9 @@ ffs_bufwrite(struct buf *bp)
("bufwrite: needs chained iodone (%p)", bp->b_iodone));
/* get a new block */
- newbp = geteblk(bp->b_bufsize);
+ newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
+ if (newbp == NULL)
+ goto normal_write;
/*
* set it to be identical to the old block. We have to
@@ -1882,6 +1884,7 @@ ffs_bufwrite(struct buf *bp)
}
/* Let the normal bufwrite do the rest for us */
+normal_write:
return (bufwrite(bp));
}
More information about the svn-src-stable
mailing list