svn commit: r256027 - stable/9/sbin/hastd
Mikolaj Golub
trociny at FreeBSD.org
Thu Oct 3 18:52:06 UTC 2013
Author: trociny
Date: Thu Oct 3 18:52:04 2013
New Revision: 256027
URL: http://svnweb.freebsd.org/changeset/base/256027
Log:
MFC r255714, r255716, r255717:
r255714:
Use cv_broadcast() instead of cv_signal() when waking up threads
waiting on an empty queue as the queue may have several consumers.
Before the fix the following scenario was possible: 2 threads are
waiting on empty queue, 2 threads are inserting simultaneously. The
first inserting thread detects that the queue is empty and is going to
send the signal, but before it sends the second thread inserts
too. When the first sends the signal only one of the waiting threads
receive it while the other one may wait forever.
The scenario above is is believed to be the cause of the observed
cases, when ggate_recv_thread() was getting stuck on taking free
request, while the free queue was not empty.
Reviewed by: pjd
Tested by: Yamagi Burmeister yamagi.org
r255716:
When updating the map of dirty extents, most recently used extents are
kept dirty to reduce the number of on-disk metadata updates. The
sequence of operations is:
1) acquire the activemap lock;
2) update in-memory map;
3) if the list of keepdirty extents is changed, update on-disk metadata;
4) release the lock.
On-disk updates are not frequent in comparison with in-memory updates,
while require much more time. So situations are possible when one
thread is updating on-disk metadata and another one is waiting for the
activemap lock just to update the in-memory map.
Improve this by introducing additional, on-disk map lock: when
in-memory map is updated and it is detected that the on-disk map needs
update too, the on-disk map lock is acquired and the on-memory lock is
released before flushing the map.
Reported by: Yamagi Burmeister yamagi.org
Tested by: Yamagi Burmeister yamagi.org
Reviewed by: pjd
r255717:
Fix comments.
Modified:
stable/9/sbin/hastd/hast.h
stable/9/sbin/hastd/primary.c
stable/9/sbin/hastd/secondary.c
Directory Properties:
stable/9/sbin/hastd/ (props changed)
Modified: stable/9/sbin/hastd/hast.h
==============================================================================
--- stable/9/sbin/hastd/hast.h Thu Oct 3 18:50:09 2013 (r256026)
+++ stable/9/sbin/hastd/hast.h Thu Oct 3 18:52:04 2013 (r256027)
@@ -226,8 +226,10 @@ struct hast_resource {
/* Activemap structure. */
struct activemap *hr_amp;
- /* Locked used to synchronize access to hr_amp. */
+ /* Lock used to synchronize access to hr_amp. */
pthread_mutex_t hr_amp_lock;
+ /* Lock used to synchronize access to hr_amp diskmap. */
+ pthread_mutex_t hr_amp_diskmap_lock;
/* Number of BIO_READ requests. */
uint64_t hr_stat_read;
Modified: stable/9/sbin/hastd/primary.c
==============================================================================
--- stable/9/sbin/hastd/primary.c Thu Oct 3 18:50:09 2013 (r256026)
+++ stable/9/sbin/hastd/primary.c Thu Oct 3 18:52:04 2013 (r256027)
@@ -172,7 +172,7 @@ static pthread_mutex_t metadata_lock;
hio_next[(ncomp)]); \
mtx_unlock(&hio_##name##_list_lock[ncomp]); \
if (_wakeup) \
- cv_signal(&hio_##name##_list_cond[(ncomp)]); \
+ cv_broadcast(&hio_##name##_list_cond[(ncomp)]); \
} while (0)
#define QUEUE_INSERT2(hio, name) do { \
bool _wakeup; \
@@ -182,7 +182,7 @@ static pthread_mutex_t metadata_lock;
TAILQ_INSERT_TAIL(&hio_##name##_list, (hio), hio_##name##_next);\
mtx_unlock(&hio_##name##_list_lock); \
if (_wakeup) \
- cv_signal(&hio_##name##_list_cond); \
+ cv_broadcast(&hio_##name##_list_cond); \
} while (0)
#define QUEUE_TAKE1(hio, name, ncomp, timeout) do { \
bool _last; \
@@ -291,22 +291,28 @@ primary_exitx(int exitcode, const char *
exit(exitcode);
}
+/* Expects res->hr_amp locked, returns unlocked. */
static int
hast_activemap_flush(struct hast_resource *res)
{
const unsigned char *buf;
size_t size;
+ int ret;
+ mtx_lock(&res->hr_amp_diskmap_lock);
buf = activemap_bitmap(res->hr_amp, &size);
+ mtx_unlock(&res->hr_amp_lock);
PJDLOG_ASSERT(buf != NULL);
PJDLOG_ASSERT((size % res->hr_local_sectorsize) == 0);
+ ret = 0;
if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
(ssize_t)size) {
pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
res->hr_stat_activemap_write_error++;
- return (-1);
+ ret = -1;
}
- if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
+ if (ret == 0 && res->hr_metaflush == 1 &&
+ g_flush(res->hr_localfd) == -1) {
if (errno == EOPNOTSUPP) {
pjdlog_warning("The %s provider doesn't support flushing write cache. Disabling it.",
res->hr_localpath);
@@ -315,10 +321,11 @@ hast_activemap_flush(struct hast_resourc
pjdlog_errno(LOG_ERR,
"Unable to flush disk cache on activemap update");
res->hr_stat_activemap_flush_error++;
- return (-1);
+ ret = -1;
}
}
- return (0);
+ mtx_unlock(&res->hr_amp_diskmap_lock);
+ return (ret);
}
static bool
@@ -783,6 +790,7 @@ init_remote(struct hast_resource *res, s
* Now that we merged bitmaps from both nodes, flush it to the
* disk before we start to synchronize.
*/
+ mtx_lock(&res->hr_amp_lock);
(void)hast_activemap_flush(res);
}
nv_free(nvin);
@@ -1288,8 +1296,9 @@ ggate_recv_thread(void *arg)
ggio->gctl_offset, ggio->gctl_length)) {
res->hr_stat_activemap_update++;
(void)hast_activemap_flush(res);
+ } else {
+ mtx_unlock(&res->hr_amp_lock);
}
- mtx_unlock(&res->hr_amp_lock);
break;
case BIO_DELETE:
res->hr_stat_delete++;
@@ -1649,8 +1658,9 @@ done_queue:
if (activemap_need_sync(res->hr_amp, ggio->gctl_offset,
ggio->gctl_length)) {
(void)hast_activemap_flush(res);
+ } else {
+ mtx_unlock(&res->hr_amp_lock);
}
- mtx_unlock(&res->hr_amp_lock);
if (hio->hio_replication == HAST_REPLICATION_MEMSYNC)
(void)refcnt_release(&hio->hio_countdown);
}
@@ -1917,8 +1927,9 @@ ggate_send_thread(void *arg)
ggio->gctl_offset, ggio->gctl_length)) {
res->hr_stat_activemap_update++;
(void)hast_activemap_flush(res);
+ } else {
+ mtx_unlock(&res->hr_amp_lock);
}
- mtx_unlock(&res->hr_amp_lock);
}
if (ggio->gctl_cmd == BIO_WRITE) {
/*
@@ -2014,8 +2025,11 @@ sync_thread(void *arg __unused)
*/
if (activemap_extent_complete(res->hr_amp, syncext))
(void)hast_activemap_flush(res);
+ else
+ mtx_unlock(&res->hr_amp_lock);
+ } else {
+ mtx_unlock(&res->hr_amp_lock);
}
- mtx_unlock(&res->hr_amp_lock);
if (dorewind) {
dorewind = false;
if (offset == -1)
Modified: stable/9/sbin/hastd/secondary.c
==============================================================================
--- stable/9/sbin/hastd/secondary.c Thu Oct 3 18:50:09 2013 (r256026)
+++ stable/9/sbin/hastd/secondary.c Thu Oct 3 18:52:04 2013 (r256027)
@@ -85,14 +85,13 @@ static TAILQ_HEAD(, hio) hio_free_list;
static pthread_mutex_t hio_free_list_lock;
static pthread_cond_t hio_free_list_cond;
/*
- * Disk thread (the one that do I/O requests) takes requests from this list.
+ * Disk thread (the one that does I/O requests) takes requests from this list.
*/
static TAILQ_HEAD(, hio) hio_disk_list;
static pthread_mutex_t hio_disk_list_lock;
static pthread_cond_t hio_disk_list_cond;
/*
- * There is one recv list for every component, although local components don't
- * use recv lists as local requests are done synchronously.
+ * Thread that sends requests back to primary takes requests from this list.
*/
static TAILQ_HEAD(, hio) hio_send_list;
static pthread_mutex_t hio_send_list_lock;
@@ -115,7 +114,7 @@ static void *send_thread(void *arg);
TAILQ_INSERT_TAIL(&hio_##name##_list, (hio), hio_next); \
mtx_unlock(&hio_##name##_list_lock); \
if (_wakeup) \
- cv_signal(&hio_##name##_list_cond); \
+ cv_broadcast(&hio_##name##_list_cond); \
} while (0)
#define QUEUE_TAKE(name, hio) do { \
mtx_lock(&hio_##name##_list_lock); \
More information about the svn-src-stable-9
mailing list