svn commit: r260007 - in stable/9/sbin: hastctl hastd
Mikolaj Golub
trociny at FreeBSD.org
Sat Dec 28 19:22:25 UTC 2013
Author: trociny
Date: Sat Dec 28 19:22:23 2013
New Revision: 260007
URL: http://svnweb.freebsd.org/changeset/base/260007
Log:
MFC r257155, r257582, r259191, r259192, r259193, r259194, r259195, r259196:
r257155:
Make hastctl list command output current queue sizes.
Reviewed by: pjd
r257582 (pjd):
Correct alignment.
r259191:
For memsync replication, hio_countdown is used not only as an
indication when a request can be moved to done queue, but also for
detecting the current state of memsync request.
This approach has problems, e.g. leaking a request if memsynk ack from
the secondary failed, or racy usage of write_complete, which should be
called only once per write request, but for memsync can be entered by
local_send_thread and ggate_send_thread simultaneously.
So the following approach is implemented instead:
1) Use hio_countdown only for counting components we waiting to
complete, i.e. initially it is always 2 for any replication mode.
2) To distinguish between "memsync ack" and "memsync fin" responses
from the secondary, add and use hio_memsyncacked field.
3) write_complete() in component threads is called only before
releasing hio_countdown (i.e. before the hio may be returned to the
done queue).
4) Add and use hio_writecount refcounter to detect when
write_complete() can be called in memsync case.
Reported by: Pete French petefrench ingresso.co.uk
Tested by: Pete French petefrench ingresso.co.uk
r259192:
Add some macros to make the code more readable (no functional chages).
r259193:
Fix compiler warnings.
r259194:
In remote_send_thread, if sending a request fails don't take the
request back from the receive queue -- it might already be processed
by remote_recv_thread, which lead to crashes like below:
(primary) Unable to receive reply header: Connection reset by peer.
(primary) Unable to send request (Connection reset by peer):
WRITE(954662912, 131072).
(primary) Disconnected from kopusha:7772.
(primary) Increasing localcnt to 1.
(primary) Assertion failed: (old > 0), function refcnt_release,
file refcnt.h, line 62.
Taking the request back was not necessary (it would properly be
processed by the remote_recv_thread) and only complicated things.
r259195:
Send wakeup to threads waiting on empty queue before releasing the
lock to decrease spurious wakeups.
Submitted by: davidxu
r259196:
Check remote protocol version only for the first connection (when it
is actually sent by the remote node).
Otherwise it generated confusing "Negotiated protocol version 1" debug
messages when processing the second connection.
Modified:
stable/9/sbin/hastctl/hastctl.c
stable/9/sbin/hastd/control.c
stable/9/sbin/hastd/hast.h
stable/9/sbin/hastd/hastd.8
stable/9/sbin/hastd/hastd.c
stable/9/sbin/hastd/nv.c
stable/9/sbin/hastd/primary.c
stable/9/sbin/hastd/proto.c
stable/9/sbin/hastd/secondary.c
Directory Properties:
stable/9/sbin/hastctl/ (props changed)
stable/9/sbin/hastd/ (props changed)
Modified: stable/9/sbin/hastctl/hastctl.c
==============================================================================
--- stable/9/sbin/hastctl/hastctl.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastctl/hastctl.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -355,6 +355,13 @@ control_list(struct nv *nv)
(uintmax_t)nv_get_uint64(nv, "stat_write_error%u", ii),
(uintmax_t)nv_get_uint64(nv, "stat_delete_error%u", ii),
(uintmax_t)nv_get_uint64(nv, "stat_flush_error%u", ii));
+ printf(" queues: "
+ "local: %ju, send: %ju, recv: %ju, done: %ju, idle: %ju\n",
+ (uintmax_t)nv_get_uint64(nv, "local_queue_size%u", ii),
+ (uintmax_t)nv_get_uint64(nv, "send_queue_size%u", ii),
+ (uintmax_t)nv_get_uint64(nv, "recv_queue_size%u", ii),
+ (uintmax_t)nv_get_uint64(nv, "done_queue_size%u", ii),
+ (uintmax_t)nv_get_uint64(nv, "idle_queue_size%u", ii));
}
return (ret);
}
Modified: stable/9/sbin/hastd/control.c
==============================================================================
--- stable/9/sbin/hastd/control.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/control.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -215,6 +215,16 @@ control_status_worker(struct hast_resour
"stat_delete_error%u", no);
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
"stat_flush_error%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "idle_queue_size"),
+ "idle_queue_size%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "local_queue_size"),
+ "local_queue_size%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "send_queue_size"),
+ "send_queue_size%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "recv_queue_size"),
+ "recv_queue_size%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "done_queue_size"),
+ "done_queue_size%u", no);
end:
if (cnvin != NULL)
nv_free(cnvin);
@@ -478,6 +488,7 @@ ctrl_thread(void *arg)
nv_add_uint64(nvout, res->hr_stat_flush_error +
res->hr_stat_activemap_flush_error,
"stat_flush_error");
+ res->output_status_aux(nvout);
nv_add_int16(nvout, 0, "error");
break;
case CONTROL_RELOAD:
Modified: stable/9/sbin/hastd/hast.h
==============================================================================
--- stable/9/sbin/hastd/hast.h Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/hast.h Sat Dec 28 19:22:23 2013 (r260007)
@@ -137,6 +137,8 @@ struct hastd_config {
#define HAST_CHECKSUM_CRC32 1
#define HAST_CHECKSUM_SHA256 2
+struct nv;
+
/*
* Structure that describes single resource.
*/
@@ -254,6 +256,9 @@ struct hast_resource {
/* Number of activemap flush errors. */
uint64_t hr_stat_activemap_flush_error;
+ /* Function to output worker specific info on control status request. */
+ void (*output_status_aux)(struct nv *);
+
/* Next resource. */
TAILQ_ENTRY(hast_resource) hr_next;
};
Modified: stable/9/sbin/hastd/hastd.8
==============================================================================
--- stable/9/sbin/hastd/hastd.8 Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/hastd.8 Sat Dec 28 19:22:23 2013 (r260007)
@@ -171,7 +171,7 @@ The default location is
.Pa /var/run/hastd.pid .
.El
.Sh FILES
-.Bl -tag -width ".Pa /var/run/hastctl" -compact
+.Bl -tag -width ".Pa /var/run/hastd.pid" -compact
.It Pa /etc/hast.conf
The configuration file for
.Nm
Modified: stable/9/sbin/hastd/hastd.c
==============================================================================
--- stable/9/sbin/hastd/hastd.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/hastd.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -806,12 +806,6 @@ listen_accept(struct hastd_listen *lst)
*/
version = 1;
}
- if (version > HAST_PROTO_VERSION) {
- pjdlog_info("Remote protocol version %hhu is not supported, falling back to version %hhu.",
- version, (unsigned char)HAST_PROTO_VERSION);
- version = HAST_PROTO_VERSION;
- }
- pjdlog_debug(1, "Negotiated protocol version %hhu.", version);
token = nv_get_uint8_array(nvin, &size, "token");
/*
* NULL token means that this is first connection.
@@ -925,6 +919,12 @@ listen_accept(struct hastd_listen *lst)
*/
if (token == NULL) {
+ if (version > HAST_PROTO_VERSION) {
+ pjdlog_info("Remote protocol version %hhu is not supported, falling back to version %hhu.",
+ version, (unsigned char)HAST_PROTO_VERSION);
+ version = HAST_PROTO_VERSION;
+ }
+ pjdlog_debug(1, "Negotiated protocol version %hhu.", version);
res->hr_version = version;
arc4random_buf(res->hr_token, sizeof(res->hr_token));
nvout = nv_alloc();
Modified: stable/9/sbin/hastd/nv.c
==============================================================================
--- stable/9/sbin/hastd/nv.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/nv.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -566,7 +566,7 @@ nv_get_string(struct nv *nv, const char
return (NULL);
PJDLOG_ASSERT((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_HOST);
PJDLOG_ASSERT(nvh->nvh_dsize >= 1);
- str = NVH_DATA(nvh);
+ str = (char *)NVH_DATA(nvh);
PJDLOG_ASSERT(str[nvh->nvh_dsize - 1] == '\0');
PJDLOG_ASSERT(strlen(str) == nvh->nvh_dsize - 1);
return (str);
Modified: stable/9/sbin/hastd/primary.c
==============================================================================
--- stable/9/sbin/hastd/primary.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/primary.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -94,6 +94,15 @@ struct hio {
*/
bool hio_done;
/*
+ * Number of components we are still waiting before sending write
+ * completion ack to GEOM Gate. Used for memsync.
+ */
+ unsigned int hio_writecount;
+ /*
+ * Memsync request was acknowleged by remote.
+ */
+ bool hio_memsyncacked;
+ /*
* Remember replication from the time the request was initiated,
* so we won't get confused when replication changes on reload.
*/
@@ -108,6 +117,7 @@ struct hio {
* until some in-progress requests are freed.
*/
static TAILQ_HEAD(, hio) hio_free_list;
+static size_t hio_free_list_size;
static pthread_mutex_t hio_free_list_lock;
static pthread_cond_t hio_free_list_cond;
/*
@@ -116,20 +126,26 @@ static pthread_cond_t hio_free_list_cond
* responsible for managing his own send list.
*/
static TAILQ_HEAD(, hio) *hio_send_list;
+static size_t *hio_send_list_size;
static pthread_mutex_t *hio_send_list_lock;
static pthread_cond_t *hio_send_list_cond;
+#define hio_send_local_list_size hio_send_list_size[0]
+#define hio_send_remote_list_size hio_send_list_size[1]
/*
* There is one recv list for every component, although local components don't
* use recv lists as local requests are done synchronously.
*/
static TAILQ_HEAD(, hio) *hio_recv_list;
+static size_t *hio_recv_list_size;
static pthread_mutex_t *hio_recv_list_lock;
static pthread_cond_t *hio_recv_list_cond;
+#define hio_recv_remote_list_size hio_recv_list_size[1]
/*
* Request is placed on done list by the slowest component (the one that
* decreased hio_countdown from 1 to 0).
*/
static TAILQ_HEAD(, hio) hio_done_list;
+static size_t hio_done_list_size;
static pthread_mutex_t hio_done_list_lock;
static pthread_cond_t hio_done_list_cond;
/*
@@ -164,25 +180,21 @@ static pthread_mutex_t metadata_lock;
((res)->hr_remotein != NULL && (res)->hr_remoteout != NULL)
#define QUEUE_INSERT1(hio, name, ncomp) do { \
- bool _wakeup; \
- \
mtx_lock(&hio_##name##_list_lock[(ncomp)]); \
- _wakeup = TAILQ_EMPTY(&hio_##name##_list[(ncomp)]); \
+ if (TAILQ_EMPTY(&hio_##name##_list[(ncomp)])) \
+ cv_broadcast(&hio_##name##_list_cond[(ncomp)]); \
TAILQ_INSERT_TAIL(&hio_##name##_list[(ncomp)], (hio), \
hio_next[(ncomp)]); \
- mtx_unlock(&hio_##name##_list_lock[ncomp]); \
- if (_wakeup) \
- cv_broadcast(&hio_##name##_list_cond[(ncomp)]); \
+ hio_##name##_list_size[(ncomp)]++; \
+ mtx_unlock(&hio_##name##_list_lock[(ncomp)]); \
} while (0)
#define QUEUE_INSERT2(hio, name) do { \
- bool _wakeup; \
- \
mtx_lock(&hio_##name##_list_lock); \
- _wakeup = TAILQ_EMPTY(&hio_##name##_list); \
+ if (TAILQ_EMPTY(&hio_##name##_list)) \
+ cv_broadcast(&hio_##name##_list_cond); \
TAILQ_INSERT_TAIL(&hio_##name##_list, (hio), hio_##name##_next);\
+ hio_##name##_list_size++; \
mtx_unlock(&hio_##name##_list_lock); \
- if (_wakeup) \
- cv_broadcast(&hio_##name##_list_cond); \
} while (0)
#define QUEUE_TAKE1(hio, name, ncomp, timeout) do { \
bool _last; \
@@ -196,6 +208,8 @@ static pthread_mutex_t metadata_lock;
_last = true; \
} \
if (hio != NULL) { \
+ PJDLOG_ASSERT(hio_##name##_list_size[(ncomp)] != 0); \
+ hio_##name##_list_size[(ncomp)]--; \
TAILQ_REMOVE(&hio_##name##_list[(ncomp)], (hio), \
hio_next[(ncomp)]); \
} \
@@ -207,10 +221,16 @@ static pthread_mutex_t metadata_lock;
cv_wait(&hio_##name##_list_cond, \
&hio_##name##_list_lock); \
} \
+ PJDLOG_ASSERT(hio_##name##_list_size != 0); \
+ hio_##name##_list_size--; \
TAILQ_REMOVE(&hio_##name##_list, (hio), hio_##name##_next); \
mtx_unlock(&hio_##name##_list_lock); \
} while (0)
+#define ISFULLSYNC(hio) ((hio)->hio_replication == HAST_REPLICATION_FULLSYNC)
+#define ISMEMSYNC(hio) ((hio)->hio_replication == HAST_REPLICATION_MEMSYNC)
+#define ISASYNC(hio) ((hio)->hio_replication == HAST_REPLICATION_ASYNC)
+
#define SYNCREQ(hio) do { \
(hio)->hio_ggio.gctl_unit = -1; \
(hio)->hio_ggio.gctl_seq = 1; \
@@ -219,6 +239,9 @@ static pthread_mutex_t metadata_lock;
#define SYNCREQDONE(hio) do { (hio)->hio_ggio.gctl_unit = -2; } while (0)
#define ISSYNCREQDONE(hio) ((hio)->hio_ggio.gctl_unit == -2)
+#define ISMEMSYNCWRITE(hio) (ISMEMSYNC(hio) && \
+ (hio)->hio_ggio.gctl_cmd == BIO_WRITE && !ISSYNCREQ(hio))
+
static struct hast_resource *gres;
static pthread_mutex_t range_lock;
@@ -239,6 +262,22 @@ static void *sync_thread(void *arg);
static void *guard_thread(void *arg);
static void
+output_status_aux(struct nv *nvout)
+{
+
+ nv_add_uint64(nvout, (uint64_t)hio_free_list_size,
+ "idle_queue_size");
+ nv_add_uint64(nvout, (uint64_t)hio_send_local_list_size,
+ "local_queue_size");
+ nv_add_uint64(nvout, (uint64_t)hio_send_remote_list_size,
+ "send_queue_size");
+ nv_add_uint64(nvout, (uint64_t)hio_recv_remote_list_size,
+ "recv_queue_size");
+ nv_add_uint64(nvout, (uint64_t)hio_done_list_size,
+ "done_queue_size");
+}
+
+static void
cleanup(struct hast_resource *res)
{
int rerrno;
@@ -355,6 +394,12 @@ init_environment(struct hast_resource *r
"Unable to allocate %zu bytes of memory for send lists.",
sizeof(hio_send_list[0]) * ncomps);
}
+ hio_send_list_size = malloc(sizeof(hio_send_list_size[0]) * ncomps);
+ if (hio_send_list_size == NULL) {
+ primary_exitx(EX_TEMPFAIL,
+ "Unable to allocate %zu bytes of memory for send list counters.",
+ sizeof(hio_send_list_size[0]) * ncomps);
+ }
hio_send_list_lock = malloc(sizeof(hio_send_list_lock[0]) * ncomps);
if (hio_send_list_lock == NULL) {
primary_exitx(EX_TEMPFAIL,
@@ -373,6 +418,12 @@ init_environment(struct hast_resource *r
"Unable to allocate %zu bytes of memory for recv lists.",
sizeof(hio_recv_list[0]) * ncomps);
}
+ hio_recv_list_size = malloc(sizeof(hio_recv_list_size[0]) * ncomps);
+ if (hio_recv_list_size == NULL) {
+ primary_exitx(EX_TEMPFAIL,
+ "Unable to allocate %zu bytes of memory for recv list counters.",
+ sizeof(hio_recv_list_size[0]) * ncomps);
+ }
hio_recv_list_lock = malloc(sizeof(hio_recv_list_lock[0]) * ncomps);
if (hio_recv_list_lock == NULL) {
primary_exitx(EX_TEMPFAIL,
@@ -393,16 +444,18 @@ init_environment(struct hast_resource *r
}
/*
- * Initialize lists, their locks and theirs condition variables.
+ * Initialize lists, their counters, locks and condition variables.
*/
TAILQ_INIT(&hio_free_list);
mtx_init(&hio_free_list_lock);
cv_init(&hio_free_list_cond);
for (ii = 0; ii < HAST_NCOMPONENTS; ii++) {
TAILQ_INIT(&hio_send_list[ii]);
+ hio_send_list_size[ii] = 0;
mtx_init(&hio_send_list_lock[ii]);
cv_init(&hio_send_list_cond[ii]);
TAILQ_INIT(&hio_recv_list[ii]);
+ hio_recv_list_size[ii] = 0;
mtx_init(&hio_recv_list_lock[ii]);
cv_init(&hio_recv_list_cond[ii]);
rw_init(&hio_remote_lock[ii]);
@@ -445,6 +498,7 @@ init_environment(struct hast_resource *r
hio->hio_ggio.gctl_length = MAXPHYS;
hio->hio_ggio.gctl_error = 0;
TAILQ_INSERT_HEAD(&hio_free_list, hio, hio_free_next);
+ hio_free_list_size++;
}
}
@@ -963,6 +1017,7 @@ hastd_primary(struct hast_resource *res)
}
gres = res;
+ res->output_status_aux = output_status_aux;
mode = pjdlog_mode_get();
debuglevel = pjdlog_debug_get();
@@ -1299,6 +1354,10 @@ ggate_recv_thread(void *arg)
} else {
mtx_unlock(&res->hr_amp_lock);
}
+ if (ISMEMSYNC(hio)) {
+ hio->hio_memsyncacked = false;
+ hio->hio_writecount = ncomps;
+ }
break;
case BIO_DELETE:
res->hr_stat_delete++;
@@ -1310,11 +1369,6 @@ ggate_recv_thread(void *arg)
pjdlog_debug(2,
"ggate_recv: (%p) Moving request to the send queues.", hio);
hio->hio_countdown = ncomps;
- if (hio->hio_replication == HAST_REPLICATION_MEMSYNC &&
- ggio->gctl_cmd == BIO_WRITE) {
- /* Each remote request needs two responses in memsync. */
- hio->hio_countdown++;
- }
for (ii = ncomp; ii < ncomps; ii++)
QUEUE_INSERT1(hio, send, ii);
}
@@ -1385,8 +1439,7 @@ local_send_thread(void *arg)
ret, (intmax_t)ggio->gctl_length);
} else {
hio->hio_errors[ncomp] = 0;
- if (hio->hio_replication ==
- HAST_REPLICATION_ASYNC) {
+ if (ISASYNC(hio)) {
ggio->gctl_error = 0;
write_complete(res, hio);
}
@@ -1424,42 +1477,13 @@ local_send_thread(void *arg)
}
break;
}
-
- if (hio->hio_replication != HAST_REPLICATION_MEMSYNC ||
- ggio->gctl_cmd != BIO_WRITE || ISSYNCREQ(hio)) {
- if (refcnt_release(&hio->hio_countdown) > 0)
- continue;
- } else {
- /*
- * Depending on hio_countdown value, requests finished
- * in the following order:
- * 0: remote memsync, remote final, local write
- * 1: remote memsync, local write, (remote final)
- * 2: local write, (remote memsync), (remote final)
- */
- switch (refcnt_release(&hio->hio_countdown)) {
- case 0:
- /*
- * Local write finished as last.
- */
- break;
- case 1:
- /*
- * Local write finished after remote memsync
- * reply arrvied. We can complete the write now.
- */
- if (hio->hio_errors[0] == 0)
- write_complete(res, hio);
- continue;
- case 2:
- /*
- * Local write finished as first.
- */
- continue;
- default:
- PJDLOG_ABORT("Invalid hio_countdown.");
+ if (ISMEMSYNCWRITE(hio)) {
+ if (refcnt_release(&hio->hio_writecount) == 0) {
+ write_complete(res, hio);
}
}
+ if (refcnt_release(&hio->hio_countdown) > 0)
+ continue;
if (ISSYNCREQ(hio)) {
mtx_lock(&sync_lock);
SYNCREQDONE(hio);
@@ -1581,10 +1605,8 @@ remote_send_thread(void *arg)
nv_add_uint64(nv, (uint64_t)ggio->gctl_seq, "seq");
nv_add_uint64(nv, offset, "offset");
nv_add_uint64(nv, length, "length");
- if (hio->hio_replication == HAST_REPLICATION_MEMSYNC &&
- ggio->gctl_cmd == BIO_WRITE && !ISSYNCREQ(hio)) {
+ if (ISMEMSYNCWRITE(hio))
nv_add_uint8(nv, 1, "memsync");
- }
if (nv_error(nv) != 0) {
hio->hio_errors[ncomp] = nv_error(nv);
pjdlog_debug(2,
@@ -1616,6 +1638,7 @@ remote_send_thread(void *arg)
mtx_lock(&hio_recv_list_lock[ncomp]);
wakeup = TAILQ_EMPTY(&hio_recv_list[ncomp]);
TAILQ_INSERT_TAIL(&hio_recv_list[ncomp], hio, hio_next[ncomp]);
+ hio_recv_list_size[ncomp]++;
mtx_unlock(&hio_recv_list_lock[ncomp]);
if (hast_proto_send(res, res->hr_remoteout, nv, data,
data != NULL ? length : 0) == -1) {
@@ -1627,17 +1650,9 @@ remote_send_thread(void *arg)
"Unable to send request (%s): ",
strerror(hio->hio_errors[ncomp]));
remote_close(res, ncomp);
- /*
- * Take request back from the receive queue and move
- * it immediately to the done queue.
- */
- mtx_lock(&hio_recv_list_lock[ncomp]);
- TAILQ_REMOVE(&hio_recv_list[ncomp], hio,
- hio_next[ncomp]);
- mtx_unlock(&hio_recv_list_lock[ncomp]);
- goto done_queue;
+ } else {
+ rw_unlock(&hio_remote_lock[ncomp]);
}
- rw_unlock(&hio_remote_lock[ncomp]);
nv_free(nv);
if (wakeup)
cv_signal(&hio_recv_list_cond[ncomp]);
@@ -1661,8 +1676,12 @@ done_queue:
} else {
mtx_unlock(&res->hr_amp_lock);
}
- if (hio->hio_replication == HAST_REPLICATION_MEMSYNC)
- (void)refcnt_release(&hio->hio_countdown);
+ if (ISMEMSYNCWRITE(hio)) {
+ if (refcnt_release(&hio->hio_writecount) == 0) {
+ if (hio->hio_errors[0] == 0)
+ write_complete(res, hio);
+ }
+ }
}
if (refcnt_release(&hio->hio_countdown) > 0)
continue;
@@ -1718,7 +1737,9 @@ remote_recv_thread(void *arg)
PJDLOG_ASSERT(hio != NULL);
TAILQ_REMOVE(&hio_recv_list[ncomp], hio,
hio_next[ncomp]);
+ hio_recv_list_size[ncomp]--;
mtx_unlock(&hio_recv_list_lock[ncomp]);
+ hio->hio_errors[ncomp] = ENOTCONN;
goto done_queue;
}
if (hast_proto_recv_hdr(res->hr_remotein, &nv) == -1) {
@@ -1741,6 +1762,7 @@ remote_recv_thread(void *arg)
if (hio->hio_ggio.gctl_seq == seq) {
TAILQ_REMOVE(&hio_recv_list[ncomp], hio,
hio_next[ncomp]);
+ hio_recv_list_size[ncomp]--;
break;
}
}
@@ -1791,80 +1813,34 @@ remote_recv_thread(void *arg)
hio->hio_errors[ncomp] = 0;
nv_free(nv);
done_queue:
- if (hio->hio_replication != HAST_REPLICATION_MEMSYNC ||
- hio->hio_ggio.gctl_cmd != BIO_WRITE || ISSYNCREQ(hio)) {
- if (refcnt_release(&hio->hio_countdown) > 0)
- continue;
- } else {
- /*
- * Depending on hio_countdown value, requests finished
- * in the following order:
- *
- * 0: local write, remote memsync, remote final
- * or
- * 0: remote memsync, local write, remote final
- *
- * 1: local write, remote memsync, (remote final)
- * or
- * 1: remote memsync, remote final, (local write)
- *
- * 2: remote memsync, (local write), (remote final)
- * or
- * 2: remote memsync, (remote final), (local write)
- */
- switch (refcnt_release(&hio->hio_countdown)) {
- case 0:
- /*
- * Remote final reply arrived.
- */
- PJDLOG_ASSERT(!memsyncack);
- break;
- case 1:
- if (memsyncack) {
- /*
- * Local request already finished, so we
- * can complete the write.
- */
+ if (ISMEMSYNCWRITE(hio)) {
+ if (!hio->hio_memsyncacked) {
+ PJDLOG_ASSERT(memsyncack ||
+ hio->hio_errors[ncomp] != 0);
+ /* Remote ack arrived. */
+ if (refcnt_release(&hio->hio_writecount) == 0) {
if (hio->hio_errors[0] == 0)
write_complete(res, hio);
- /*
- * We still need to wait for final
- * remote reply.
- */
+ }
+ hio->hio_memsyncacked = true;
+ if (hio->hio_errors[ncomp] == 0) {
pjdlog_debug(2,
- "remote_recv: (%p) Moving request back to the recv queue.",
- hio);
+ "remote_recv: (%p) Moving request "
+ "back to the recv queue.", hio);
mtx_lock(&hio_recv_list_lock[ncomp]);
TAILQ_INSERT_TAIL(&hio_recv_list[ncomp],
hio, hio_next[ncomp]);
+ hio_recv_list_size[ncomp]++;
mtx_unlock(&hio_recv_list_lock[ncomp]);
- } else {
- /*
- * Remote final reply arrived before
- * local write finished.
- * Nothing to do in such case.
- */
+ continue;
}
- continue;
- case 2:
- /*
- * We received remote memsync reply even before
- * local write finished.
- */
- PJDLOG_ASSERT(memsyncack);
-
- pjdlog_debug(2,
- "remote_recv: (%p) Moving request back to the recv queue.",
- hio);
- mtx_lock(&hio_recv_list_lock[ncomp]);
- TAILQ_INSERT_TAIL(&hio_recv_list[ncomp], hio,
- hio_next[ncomp]);
- mtx_unlock(&hio_recv_list_lock[ncomp]);
- continue;
- default:
- PJDLOG_ABORT("Invalid hio_countdown.");
+ } else {
+ PJDLOG_ASSERT(!memsyncack);
+ /* Remote final reply arrived. */
}
}
+ if (refcnt_release(&hio->hio_countdown) > 0)
+ continue;
if (ISSYNCREQ(hio)) {
mtx_lock(&sync_lock);
SYNCREQDONE(hio);
Modified: stable/9/sbin/hastd/proto.c
==============================================================================
--- stable/9/sbin/hastd/proto.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/proto.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -298,8 +298,8 @@ proto_connection_send(const struct proto
protoname = mconn->pc_proto->prt_name;
PJDLOG_ASSERT(protoname != NULL);
- ret = conn->pc_proto->prt_send(conn->pc_ctx, protoname,
- strlen(protoname) + 1, fd);
+ ret = conn->pc_proto->prt_send(conn->pc_ctx,
+ (const unsigned char *)protoname, strlen(protoname) + 1, fd);
proto_close(mconn);
if (ret != 0) {
errno = ret;
@@ -325,7 +325,7 @@ proto_connection_recv(const struct proto
bzero(protoname, sizeof(protoname));
- ret = conn->pc_proto->prt_recv(conn->pc_ctx, protoname,
+ ret = conn->pc_proto->prt_recv(conn->pc_ctx, (unsigned char *)protoname,
sizeof(protoname) - 1, &fd);
if (ret != 0) {
errno = ret;
Modified: stable/9/sbin/hastd/secondary.c
==============================================================================
--- stable/9/sbin/hastd/secondary.c Sat Dec 28 19:21:22 2013 (r260006)
+++ stable/9/sbin/hastd/secondary.c Sat Dec 28 19:22:23 2013 (r260007)
@@ -82,18 +82,21 @@ static struct hast_resource *gres;
* until some in-progress requests are freed.
*/
static TAILQ_HEAD(, hio) hio_free_list;
+static size_t hio_free_list_size;
static pthread_mutex_t hio_free_list_lock;
static pthread_cond_t hio_free_list_cond;
/*
* Disk thread (the one that does I/O requests) takes requests from this list.
*/
static TAILQ_HEAD(, hio) hio_disk_list;
+static size_t hio_disk_list_size;
static pthread_mutex_t hio_disk_list_lock;
static pthread_cond_t hio_disk_list_cond;
/*
* Thread that sends requests back to primary takes requests from this list.
*/
static TAILQ_HEAD(, hio) hio_send_list;
+static size_t hio_send_list_size;
static pthread_mutex_t hio_send_list_lock;
static pthread_cond_t hio_send_list_cond;
@@ -107,14 +110,12 @@ static void *disk_thread(void *arg);
static void *send_thread(void *arg);
#define QUEUE_INSERT(name, hio) do { \
- bool _wakeup; \
- \
mtx_lock(&hio_##name##_list_lock); \
- _wakeup = TAILQ_EMPTY(&hio_##name##_list); \
+ if (TAILQ_EMPTY(&hio_##name##_list)) \
+ cv_broadcast(&hio_##name##_list_cond); \
TAILQ_INSERT_TAIL(&hio_##name##_list, (hio), hio_next); \
+ hio_##name##_list_size++; \
mtx_unlock(&hio_##name##_list_lock); \
- if (_wakeup) \
- cv_broadcast(&hio_##name##_list_cond); \
} while (0)
#define QUEUE_TAKE(name, hio) do { \
mtx_lock(&hio_##name##_list_lock); \
@@ -122,11 +123,22 @@ static void *send_thread(void *arg);
cv_wait(&hio_##name##_list_cond, \
&hio_##name##_list_lock); \
} \
+ PJDLOG_ASSERT(hio_##name##_list_size != 0); \
+ hio_##name##_list_size--; \
TAILQ_REMOVE(&hio_##name##_list, (hio), hio_next); \
mtx_unlock(&hio_##name##_list_lock); \
} while (0)
static void
+output_status_aux(struct nv *nvout)
+{
+
+ nv_add_uint64(nvout, (uint64_t)hio_free_list_size, "idle_queue_size");
+ nv_add_uint64(nvout, (uint64_t)hio_disk_list_size, "local_queue_size");
+ nv_add_uint64(nvout, (uint64_t)hio_send_list_size, "send_queue_size");
+}
+
+static void
hio_clear(struct hio *hio)
{
@@ -190,6 +202,7 @@ init_environment(void)
}
hio_clear(hio);
TAILQ_INSERT_HEAD(&hio_free_list, hio, hio_next);
+ hio_free_list_size++;
}
}
@@ -441,6 +454,7 @@ hastd_secondary(struct hast_resource *re
}
gres = res;
+ res->output_status_aux = output_status_aux;
mode = pjdlog_mode_get();
debuglevel = pjdlog_debug_get();
More information about the svn-src-stable-9
mailing list