svn commit: r245775 - stable/9/sys/ufs/ffs
Scott Long
scottl at FreeBSD.org
Tue Jan 22 07:18:34 UTC 2013
Author: scottl
Date: Tue Jan 22 07:18:33 2013
New Revision: 245775
URL: http://svnweb.freebsd.org/changeset/base/245775
Log:
MFC r242734, 242815:
- Implement BIO_FLUSH support around journal entries. This will not 100%
solve power loss problems with dishonest write caches. However, it
should improve the situation and force a full fsck when it is unable
to resolve with the journal.
- Resolve a case where the journal could wrap in an unsafe way causing
us to prematurely lose journal entries in very specific scenarios.
- Correct rev 242734, segments can sometimes get stuck. Be a bit more
defensive with segment state.
Modified:
stable/9/sys/ufs/ffs/ffs_softdep.c
Directory Properties:
stable/9/sys/ (props changed)
Modified: stable/9/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- stable/9/sys/ufs/ffs/ffs_softdep.c Tue Jan 22 07:10:26 2013 (r245774)
+++ stable/9/sys/ufs/ffs/ffs_softdep.c Tue Jan 22 07:18:33 2013 (r245775)
@@ -88,6 +88,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
+#include <geom/geom.h>
+
#include <ddb/ddb.h>
#ifndef SOFTUPDATES
@@ -801,6 +803,7 @@ static void handle_written_jnewblk(struc
static void handle_written_jblkdep(struct jblkdep *);
static void handle_written_jfreefrag(struct jfreefrag *);
static void complete_jseg(struct jseg *);
+static void complete_jsegs(struct jseg *);
static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
@@ -1226,6 +1229,7 @@ static struct callout softdep_callout;
static int req_pending;
static int req_clear_inodedeps; /* syncer process flush some inodedeps */
static int req_clear_remove; /* syncer process flush some freeblks */
+static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
/*
* runtime statistics
@@ -1309,6 +1313,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, cle
&stat_cleanup_retries, 0, "");
SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW,
&stat_cleanup_failures, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
+ &softdep_flushcache, 0, "");
SYSCTL_DECL(_vfs_ffs);
@@ -3083,6 +3089,67 @@ softdep_flushjournal(mp)
FREE_LOCK(&lk);
}
+static void softdep_synchronize_completed(struct bio *);
+static void softdep_synchronize(struct bio *, struct ufsmount *, void *);
+
+static void
+softdep_synchronize_completed(bp)
+ struct bio *bp;
+{
+ struct jseg *oldest;
+ struct jseg *jseg;
+
+ /*
+ * caller1 marks the last segment written before we issued the
+ * synchronize cache.
+ */
+ jseg = bp->bio_caller1;
+ oldest = NULL;
+ ACQUIRE_LOCK(&lk);
+ /*
+ * Mark all the journal entries waiting on the synchronize cache
+ * as completed so they may continue on.
+ */
+ while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) {
+ jseg->js_state |= COMPLETE;
+ oldest = jseg;
+ jseg = TAILQ_PREV(jseg, jseglst, js_next);
+ }
+ /*
+ * Restart deferred journal entry processing from the oldest
+ * completed jseg.
+ */
+ if (oldest)
+ complete_jsegs(oldest);
+
+ FREE_LOCK(&lk);
+ g_destroy_bio(bp);
+}
+
+/*
+ * Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering
+ * barriers. The journal must be written prior to any blocks that depend
+ * on it and the journal can not be released until the blocks have be
+ * written. This code handles both barriers simultaneously.
+ */
+static void
+softdep_synchronize(bp, ump, caller1)
+ struct bio *bp;
+ struct ufsmount *ump;
+ void *caller1;
+{
+
+ bp->bio_cmd = BIO_FLUSH;
+ bp->bio_flags |= BIO_ORDERED;
+ bp->bio_data = NULL;
+ bp->bio_offset = ump->um_cp->provider->mediasize;
+ bp->bio_length = 0;
+ bp->bio_done = softdep_synchronize_completed;
+ bp->bio_caller1 = caller1;
+ g_io_request(bp,
+ (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private);
+}
+
/*
* Flush some journal records to disk.
*/
@@ -3097,8 +3164,10 @@ softdep_process_journal(mp, needwk, flag
struct worklist *wk;
struct jseg *jseg;
struct buf *bp;
+ struct bio *bio;
uint8_t *data;
struct fs *fs;
+ int shouldflush;
int segwritten;
int jrecmin; /* Minimum records per block. */
int jrecmax; /* Maximum records per block. */
@@ -3109,6 +3178,9 @@ softdep_process_journal(mp, needwk, flag
if (MOUNTEDSUJ(mp) == 0)
return;
+ shouldflush = softdep_flushcache;
+ bio = NULL;
+ jseg = NULL;
ump = VFSTOUFS(mp);
fs = ump->um_fs;
jblocks = ump->softdep_jblocks;
@@ -3157,6 +3229,10 @@ softdep_process_journal(mp, needwk, flag
LIST_INIT(&jseg->js_entries);
LIST_INIT(&jseg->js_indirs);
jseg->js_state = ATTACHED;
+ if (shouldflush == 0)
+ jseg->js_state |= COMPLETE;
+ else if (bio == NULL)
+ bio = g_alloc_bio();
jseg->js_jblocks = jblocks;
bp = geteblk(fs->fs_bsize, 0);
ACQUIRE_LOCK(&lk);
@@ -3289,6 +3365,17 @@ softdep_process_journal(mp, needwk, flag
ACQUIRE_LOCK(&lk);
}
/*
+ * If we wrote a segment issue a synchronize cache so the journal
+ * is reflected on disk before the data is written. Since reclaiming
+ * journal space also requires writing a journal record this
+ * process also enforces a barrier before reclamation.
+ */
+ if (segwritten && shouldflush) {
+ softdep_synchronize(bio, ump,
+ TAILQ_LAST(&jblocks->jb_segs, jseglst));
+ } else if (bio)
+ g_destroy_bio(bio);
+ /*
* If we've suspended the filesystem because we ran out of journal
* space either try to sync it here to make some progress or
* unsuspend it if we already have.
@@ -3371,25 +3458,17 @@ complete_jseg(jseg)
}
/*
- * Mark a jseg as DEPCOMPLETE and throw away the buffer. Handle jseg
- * completions in order only.
+ * Determine which jsegs are ready for completion processing. Waits for
+ * synchronize cache to complete as well as forcing in-order completion
+ * of journal entries.
*/
static void
-handle_written_jseg(jseg, bp)
+complete_jsegs(jseg)
struct jseg *jseg;
- struct buf *bp;
{
struct jblocks *jblocks;
struct jseg *jsegn;
- if (jseg->js_refs == 0)
- panic("handle_written_jseg: No self-reference on %p", jseg);
- jseg->js_state |= DEPCOMPLETE;
- /*
- * We'll never need this buffer again, set flags so it will be
- * discarded.
- */
- bp->b_flags |= B_INVAL | B_NOCACHE;
jblocks = jseg->js_jblocks;
/*
* Don't allow out of order completions. If this isn't the first
@@ -3398,12 +3477,12 @@ handle_written_jseg(jseg, bp)
if (jseg != jblocks->jb_writeseg)
return;
/* Iterate through available jsegs processing their entries. */
- do {
+ while (jseg && (jseg->js_state & ALLCOMPLETE) == ALLCOMPLETE) {
jblocks->jb_oldestwrseq = jseg->js_oldseq;
jsegn = TAILQ_NEXT(jseg, js_next);
complete_jseg(jseg);
jseg = jsegn;
- } while (jseg && jseg->js_state & DEPCOMPLETE);
+ }
jblocks->jb_writeseg = jseg;
/*
* Attempt to free jsegs now that oldestwrseq may have advanced.
@@ -3411,6 +3490,27 @@ handle_written_jseg(jseg, bp)
free_jsegs(jblocks);
}
+/*
+ * Mark a jseg as DEPCOMPLETE and throw away the buffer. Attempt to handle
+ * the final completions.
+ */
+static void
+handle_written_jseg(jseg, bp)
+ struct jseg *jseg;
+ struct buf *bp;
+{
+
+ if (jseg->js_refs == 0)
+ panic("handle_written_jseg: No self-reference on %p", jseg);
+ jseg->js_state |= DEPCOMPLETE;
+ /*
+ * We'll never need this buffer again, set flags so it will be
+ * discarded.
+ */
+ bp->b_flags |= B_INVAL | B_NOCACHE;
+ complete_jsegs(jseg);
+}
+
static inline struct jsegdep *
inoref_jseg(inoref)
struct inoref *inoref;
@@ -4196,8 +4296,16 @@ free_jsegs(jblocks)
jblocks->jb_oldestseg = jseg;
return;
}
- if (!LIST_EMPTY(&jseg->js_indirs) &&
- jseg->js_seq >= jblocks->jb_oldestwrseq)
+ if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE)
+ break;
+ if (jseg->js_seq > jblocks->jb_oldestwrseq)
+ break;
+ /*
+ * We can free jsegs that didn't write entries when
+ * oldestwrseq == js_seq.
+ */
+ if (jseg->js_seq == jblocks->jb_oldestwrseq &&
+ jseg->js_cnt != 0)
break;
free_jseg(jseg, jblocks);
}
More information about the svn-src-stable-9
mailing list