From nobody Tue Aug 06 04:06:36 2024 X-Original-To: dev-commits-src-main@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4WdKW11fMQz5SbW4; Tue, 06 Aug 2024 04:06:37 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R11" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4WdKW10n2Lz4kVn; Tue, 6 Aug 2024 04:06:37 +0000 (UTC) (envelope-from git@FreeBSD.org) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1722917197; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=Q+/uv1VbExL7qd7DIoKDJCsLogqdm70aWbF9tDhfqcY=; b=KvcDYoEWICzylHfaKxMUKPgs706d+eoR58JLtOWnN5zS/KPo5TQd51L8ZDJLA/zyuocOlu JgX/R6CJ3KmqZhRsfsg1HlNhtCk77bPqmH4iXR3fBeMK0W7e1JEfsXdP/ZG/fHxoNVkxhR NH9q93dGtjQ6l6r6xKlJo8iL8Nuu6RNBGRwJJikBSKmxOMAPXDsoLQVn2OwMabnxY5wfgG ujv4SUKv9de8aN9IoiCRHE2JjYn2P19XJsaY8iJ9RqWr7qXmxGoPkVPgW8U21vPAXfJN2J ajXvyTonndCqDNmuDViUTUX3g3zbPTnvTUqQ/MRIiVsDD4nXvfLGQqphx6Mq0w== ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1722917197; a=rsa-sha256; cv=none; b=J36Hye4Mh18gfwpXqAQi+EV2p7dvlWB4suNbzQ97fg8PDfnW40WOXVHt+XcBZAFNYuLt0q C9h04T2B6B9MPiYpSctDuAKMFHDHeR2i/aOdzVG8gRJN4J0ar6y5E+MrnBi2+FC1tACCaZ LvtoM0kEVkZ/ZnyQRf2osqSZgJakI9q2qc8lSAZZl2DVI9Q7XrPHidKhQ34vIUJFji2DRP smqoQG++RaqN2HEOsDUMG7NtvCpohpnDMlYn0Cu9tpYuUn9dNPFWiQJeA7/YusizQdeT+g 9+qILQ0fUhXweCBvc7aVIslsEJBfC7t4UDb3SDUpONoOJAK74+ffkonnkPJWLQ== ARC-Authentication-Results: i=1; mx1.freebsd.org; none ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1722917197; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=Q+/uv1VbExL7qd7DIoKDJCsLogqdm70aWbF9tDhfqcY=; b=UtvlaKfvCpepoc7SjAJGqc6oiGJkQTUM6nLf/itqquaGpj7NEb5YNDahCCvGM0FWzxM6AH hhWQENj5OclbNx40ZGPcPjgcILnpNyb/qN77eW2tlWsUXzs8njm9T5yNuKhr3o0nAbmuGh x63IGJ0h4NTflkKIuiUsX/Twjuxda0RXDZSiA9ZM7YrCoBiO4XdeR4CZBcb89U+0+GFPck 4Fpqo1rxQelAuXMJZSj2v4P5zllT4A9lrn5CMR+xvPE8Vcm8tR51+VXkazboxAcvii83ZH dU87pe/9OkIOcl+VWQF1KKH7dr6yk1O2InWknvsF+rvk14I51GmAdRWqCSGSeg== Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4WdKW06k3Zzs99; Tue, 6 Aug 2024 04:06:36 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.18.1/8.18.1) with ESMTP id 47646a1K004610; Tue, 6 Aug 2024 04:06:36 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.18.1/8.18.1/Submit) id 47646aWe004607; Tue, 6 Aug 2024 04:06:36 GMT (envelope-from git) Date: Tue, 6 Aug 2024 04:06:36 GMT Message-Id: <202408060406.47646aWe004607@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Konstantin Belousov Subject: git: 5badbeeaf061 - main - Re-implement rangelocks part 2 List-Id: Commit messages for the main branch of the src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-main List-Help: List-Post: List-Subscribe: List-Unsubscribe: X-BeenThere: dev-commits-src-main@freebsd.org Sender: owner-dev-commits-src-main@FreeBSD.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: kib X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: 5badbeeaf0614568d37b2466d0f52676ff08a049 Auto-Submitted: auto-generated The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=5badbeeaf0614568d37b2466d0f52676ff08a049 commit 5badbeeaf0614568d37b2466d0f52676ff08a049 Author: Konstantin Belousov AuthorDate: 2023-08-18 17:32:01 +0000 Commit: Konstantin Belousov CommitDate: 2024-08-06 04:05:58 +0000 Re-implement rangelocks part 2 Allow read locks to overlap. Reviewed by: markj, Olivier Certner Tested by: pho Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D41787 --- sys/kern/kern_rangelock.c | 232 +++++++++++++++++++++++++++++++++------------- 1 file changed, 169 insertions(+), 63 deletions(-) diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c index 2ed26db49f19..9c6b7fb871f9 100644 --- a/sys/kern/kern_rangelock.c +++ b/sys/kern/kern_rangelock.c @@ -121,11 +121,28 @@ rl_e_is_marked(const struct rl_q_entry *e) return (((uintptr_t)e & 1) != 0); } +static struct rl_q_entry * +rl_e_unmark_unchecked(const struct rl_q_entry *e) +{ + return ((struct rl_q_entry *)((uintptr_t)e & ~1)); +} + static struct rl_q_entry * rl_e_unmark(const struct rl_q_entry *e) { MPASS(rl_e_is_marked(e)); - return ((struct rl_q_entry *)((uintptr_t)e & ~1)); + return (rl_e_unmark_unchecked(e)); +} + +static void +rl_e_mark(struct rl_q_entry *e) +{ +#if defined(INVARIANTS) && defined(__LP64__) + int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); + MPASS(r == 0); +#else + atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); +#endif } static struct rl_q_entry * @@ -140,42 +157,49 @@ rl_e_is_rlock(const struct rl_q_entry *e) return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); } -void -rangelock_unlock(struct rangelock *lock, void *cookie) +static void +rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) { - struct rl_q_entry *e; - - e = cookie; MPASS(lock != NULL && e != NULL); MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); MPASS(e->rl_q_owner == curthread); - sleepq_lock(&lock->sleepers); -#ifdef INVARIANTS - int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0); - MPASS(r == 0); -#else - atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); -#endif + rl_e_mark(e); lock->sleepers = false; sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); +} + +void +rangelock_unlock(struct rangelock *lock, void *cookie) +{ + sleepq_lock(&lock->sleepers); + rangelock_unlock_int(lock, cookie); sleepq_release(&lock->sleepers); } /* - * result: -1 if e1 before e2 - * 1 if e1 after e2 - * 0 if e1 and e2 overlap + * result: -1 if e1 before e2, or both locks are readers and e1 + * starts before or at e2 + * 1 if e1 after e2, or both locks are readers and e1 + * starts after e2 + * 0 if e1 and e2 overlap and at least one lock is writer */ static int rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) { + bool rds; + if (e1 == NULL) return (1); - if (e1->rl_q_start >= e2->rl_q_end) - return (1); if (e2->rl_q_start >= e1->rl_q_end) return (-1); + rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); + if (e2->rl_q_start >= e1->rl_q_start && rds) + return (-1); + if (e1->rl_q_start >= e2->rl_q_end) + return (1); + if (e1->rl_q_start >= e2->rl_q_start && rds) + return (1); return (0); } @@ -199,7 +223,95 @@ rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, (uintptr_t)new) != 0); } -static bool +enum RL_INSERT_RES { + RL_TRYLOCK_FAILED, + RL_LOCK_SUCCESS, + RL_LOCK_RETRY, +}; + +static enum RL_INSERT_RES +rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, + struct rl_q_entry **free) +{ + struct rl_q_entry *cur, *next, **prev; + + prev = &e->rl_q_next; + cur = rl_q_load(prev); + MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ + for (;;) { + if (cur == NULL || cur->rl_q_start > e->rl_q_end) + return (RL_LOCK_SUCCESS); + next = rl_q_load(&cur->rl_q_next); + if (rl_e_is_marked(next)) { + next = rl_e_unmark(next); + if (rl_q_cas(prev, cur, next)) { + cur->rl_q_free = *free; + *free = cur; + } + cur = next; + continue; + } + if (rl_e_is_rlock(cur)) { + prev = &cur->rl_q_next; + cur = rl_e_unmark_unchecked(rl_q_load(prev)); + continue; + } + if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { + sleepq_lock(&lock->sleepers); + if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { + sleepq_release(&lock->sleepers); + continue; + } + rangelock_unlock_int(lock, e); + if (trylock) { + sleepq_release(&lock->sleepers); + return (RL_TRYLOCK_FAILED); + } + rl_insert_sleep(lock); + return (RL_LOCK_RETRY); + } + } +} + +static enum RL_INSERT_RES +rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, + bool trylock, struct rl_q_entry **free) +{ + struct rl_q_entry *cur, *next, **prev; + + prev = &lock->head; + cur = rl_q_load(prev); + MPASS(!rl_e_is_marked(cur)); /* head is not marked */ + for (;;) { + if (cur == e) + return (RL_LOCK_SUCCESS); + next = rl_q_load(&cur->rl_q_next); + if (rl_e_is_marked(next)) { + next = rl_e_unmark(next); + if (rl_q_cas(prev, cur, next)) { + cur->rl_q_next = *free; + *free = cur; + } + cur = next; + continue; + } + if (cur->rl_q_end <= e->rl_q_start) { + prev = &cur->rl_q_next; + cur = rl_e_unmark_unchecked(rl_q_load(prev)); + continue; + } + sleepq_lock(&lock->sleepers); + rangelock_unlock_int(lock, e); + if (trylock) { + sleepq_release(&lock->sleepers); + return (RL_TRYLOCK_FAILED); + } + rl_insert_sleep(lock); + return (RL_LOCK_RETRY); + } +} + +static enum RL_INSERT_RES rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, struct rl_q_entry **free) { @@ -208,14 +320,15 @@ rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, again: prev = &lock->head; - if (rl_q_load(prev) == NULL && rl_q_cas(prev, NULL, e)) - return (true); - - for (cur = rl_q_load(prev);;) { - if (rl_e_is_marked(cur)) - goto again; + cur = rl_q_load(prev); + if (cur == NULL && rl_q_cas(prev, NULL, e)) + return (RL_LOCK_SUCCESS); + for (;;) { if (cur != NULL) { + if (rl_e_is_marked(cur)) + goto again; + next = rl_q_load(&cur->rl_q_next); if (rl_e_is_marked(next)) { next = rl_e_unmark(next); @@ -244,7 +357,7 @@ again: } if (trylock) { sleepq_release(&lock->sleepers); - return (false); + return (RL_TRYLOCK_FAILED); } rl_insert_sleep(lock); /* e is still valid */ @@ -253,7 +366,9 @@ again: e->rl_q_next = cur; if (rl_q_cas(prev, cur, e)) { atomic_thread_fence_acq(); - return (true); + return (rl_e_is_rlock(e) ? + rl_r_validate(lock, e, trylock, free) : + rl_w_validate(lock, e, trylock, free)); } /* Reset rl_q_next in case we hit fast path. */ e->rl_q_next = NULL; @@ -263,27 +378,30 @@ again: } static struct rl_q_entry * -rangelock_lock_int(struct rangelock *lock, struct rl_q_entry *e, - bool trylock) +rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, + vm_ooffset_t end, int locktype) { - struct rl_q_entry *free, *x, *xp; - bool res; - - free = NULL; - smr_enter(rl_smr); - res = rl_insert(lock, e, trylock, &free); - smr_exit(rl_smr); - MPASS(trylock || res); - if (!res) { - e->rl_q_free = free; - free = e; - e = NULL; - } - for (x = free; x != NULL; x = xp) { - MPASS(!rl_e_is_marked(x)); - xp = x->rl_q_free; - MPASS(!rl_e_is_marked(xp)); - uma_zfree_smr(rl_entry_zone, x); + struct rl_q_entry *e, *free, *x, *xp; + enum RL_INSERT_RES res; + + for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { + free = NULL; + e = rlqentry_alloc(start, end, locktype); + smr_enter(rl_smr); + res = rl_insert(lock, e, trylock, &free); + smr_exit(rl_smr); + if (res == RL_TRYLOCK_FAILED) { + MPASS(trylock); + e->rl_q_free = free; + free = e; + e = NULL; + } + for (x = free; x != NULL; x = xp) { + MPASS(!rl_e_is_marked(x)); + xp = x->rl_q_free; + MPASS(!rl_e_is_marked(xp)); + uma_zfree_smr(rl_entry_zone, x); + } } return (e); } @@ -291,37 +409,25 @@ rangelock_lock_int(struct rangelock *lock, struct rl_q_entry *e, void * rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) { - struct rl_q_entry *e; - - e = rlqentry_alloc(start, end, RL_LOCK_READ); - return (rangelock_lock_int(lock, e, false)); + return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); } void * rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) { - struct rl_q_entry *e; - - e = rlqentry_alloc(start, end, RL_LOCK_READ); - return (rangelock_lock_int(lock, e, true)); + return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); } void * rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) { - struct rl_q_entry *e; - - e = rlqentry_alloc(start, end, RL_LOCK_WRITE); - return (rangelock_lock_int(lock, e, true)); + return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); } void * rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) { - struct rl_q_entry *e; - - e = rlqentry_alloc(start, end, RL_LOCK_WRITE); - return (rangelock_lock_int(lock, e, true)); + return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); } #ifdef INVARIANT_SUPPORT