svn commit: r363503 - stable/12/sys/kern
Mateusz Guzik
mjg at FreeBSD.org
Sat Jul 25 00:01:58 UTC 2020
Author: mjg
Date: Sat Jul 25 00:01:57 2020
New Revision: 363503
URL: https://svnweb.freebsd.org/changeset/base/363503
Log:
MFC r347503,r347505,r347510,r352177,r352178,r352183,r352612,r352617,
r352631,r352810,r352811,r352812,r352813,r354893,r355124,r355125,
r356880,r356883,r356915
cache: push sdt probes in cache_zap_locked to code doing the work
cache: bump numcache on entry, while here fix lnumcache type
cache: fix a brainfart in r347505
cache: assorted cleanups
cache: change the formula for calculating lock array sizes
cache: avoid excessive relocking on entry removal during lookup
cache: jump in negative success instead of positive
cache: count evictions of negatve entries
cache: tidy up handling of negative entries
cache: stop recalculating upper limit each time a new entry is added
cache: make negative list shrinking a little bit concurrent
cache: stop requeuing negative entries on the hot list
cache: decrease ncnegfactor to 5
cache: minor stat cleanup
cache: fix numcache accounting on entry
cache: stop reusing .. entries on enter
cache: convert numcachehv to counter(9) on 64-bit platforms
cache: counter_u64_add_protected -> counter_u64_add
cache: make numcachehv use counter(9) on all archs
Modified:
stable/12/sys/kern/vfs_cache.c
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/kern/vfs_cache.c
==============================================================================
--- stable/12/sys/kern/vfs_cache.c Fri Jul 24 23:51:08 2020 (r363502)
+++ stable/12/sys/kern/vfs_cache.c Sat Jul 25 00:01:57 2020 (r363503)
@@ -91,10 +91,10 @@ SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done
SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
"struct vnode *");
-SDT_PROBE_DEFINE3(vfs, namecache, zap_negative, done, "struct vnode *",
- "char *", "int");
-SDT_PROBE_DEFINE3(vfs, namecache, shrink_negative, done, "struct vnode *",
- "char *", "int");
+SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
+ "char *");
+SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *",
+ "char *");
/*
* This structure describes the elements in the cache of recent
@@ -108,7 +108,6 @@ struct namecache {
struct vnode *nc_dvp; /* vnode of parent of name */
union {
struct vnode *nu_vp; /* vnode the name refers to */
- u_int nu_neghits; /* negative entry hits */
} n_un;
u_char nc_flag; /* flag bits */
u_char nc_nlen; /* length of name */
@@ -131,7 +130,6 @@ struct namecache_ts {
};
#define nc_vp n_un.nu_vp
-#define nc_neghits n_un.nu_neghits
/*
* Flags in namecache.nc_flag
@@ -201,31 +199,22 @@ static __read_mostly LIST_HEAD(nchashhead, namecache)
static u_long __read_mostly nchash; /* size of hash table */
SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
"Size of namecache hash table");
-static u_long __read_mostly ncnegfactor = 12; /* ratio of negative entries */
+static u_long __read_mostly ncnegfactor = 5; /* ratio of negative entries */
SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
"Ratio of negative namecache entries");
static u_long __exclusive_cache_line numneg; /* number of negative entries allocated */
-SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
- "Number of negative entries in namecache");
static u_long __exclusive_cache_line numcache;/* number of cache entries allocated */
-SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
- "Number of namecache entries");
-static u_long __exclusive_cache_line numcachehv;/* number of cache entries with vnodes held */
-SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
- "Number of namecache entries with vnodes held");
-u_int __read_mostly ncsizefactor = 2;
+u_int ncsizefactor = 2;
SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
"Size factor for namecache");
static u_int __read_mostly ncpurgeminvnodes;
SYSCTL_UINT(_vfs, OID_AUTO, ncpurgeminvnodes, CTLFLAG_RW, &ncpurgeminvnodes, 0,
"Number of vnodes below which purgevfs ignores the request");
-static u_int __read_mostly ncneghitsrequeue = 8;
-SYSCTL_UINT(_vfs, OID_AUTO, ncneghitsrequeue, CTLFLAG_RW, &ncneghitsrequeue, 0,
- "Number of hits to requeue a negative entry in the LRU list");
+static u_int __read_mostly ncsize; /* the size as computed on creation or resizing */
struct nchstats nchstats; /* cache effectiveness statistics */
-static struct mtx ncneg_shrink_lock;
+static struct mtx __exclusive_cache_line ncneg_shrink_lock;
static int shrink_list_turn;
struct neglist {
@@ -235,6 +224,7 @@ struct neglist {
static struct neglist __read_mostly *neglists;
static struct neglist ncneg_hot;
+static u_long numhotneg;
#define numneglists (ncneghash + 1)
static u_int __read_mostly ncneghash;
@@ -358,6 +348,7 @@ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW,
SYSCTL_COUNTER_U64(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, descr);
STATNODE_ULONG(numneg, "Number of negative cache entries");
STATNODE_ULONG(numcache, "Number of cache entries");
+STATNODE_COUNTER(numcachehv, "Number of namecache entries with vnodes held");
STATNODE_COUNTER(numcalls, "Number of cache lookups");
STATNODE_COUNTER(dothits, "Number of '.' hits");
STATNODE_COUNTER(dotdothits, "Number of '..' hits");
@@ -377,11 +368,20 @@ STATNODE_COUNTER(numfullpathfail2,
"Number of fullpath search errors (VOP_VPTOCNP failures)");
STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls");
+STATNODE_COUNTER(zap_and_exit_bucket_relock_success,
+ "Number of successful removals after relocking");
static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail,
"Number of times zap_and_exit failed to lock");
+static long zap_and_exit_bucket_fail2; STATNODE_ULONG(zap_and_exit_bucket_fail2,
+ "Number of times zap_and_exit failed to lock");
static long cache_lock_vnodes_cel_3_failures;
STATNODE_ULONG(cache_lock_vnodes_cel_3_failures,
"Number of times 3-way vnode locking failed");
+STATNODE_ULONG(numhotneg, "Number of hot negative entries");
+STATNODE_COUNTER(numneg_evicted,
+ "Number of negative entries evicted when adding a new entry");
+STATNODE_COUNTER(shrinking_skipped,
+ "Number of times shrinking was already in progress");
static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
@@ -393,7 +393,7 @@ static int cache_yield;
SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0,
"Number of times cache called yield");
-static void
+static void __noinline
cache_maybe_yield(void)
{
@@ -452,12 +452,14 @@ cache_assert_bucket_locked(struct namecache *ncp, int
#define cache_assert_bucket_locked(x, y) do { } while (0)
#endif
-#define cache_sort(x, y) _cache_sort((void **)(x), (void **)(y))
+#define cache_sort_vnodes(x, y) _cache_sort_vnodes((void **)(x), (void **)(y))
static void
-_cache_sort(void **p1, void **p2)
+_cache_sort_vnodes(void **p1, void **p2)
{
void *tmp;
+ MPASS(*p1 != NULL || *p2 != NULL);
+
if (*p1 > *p2) {
tmp = *p2;
*p2 = *p1;
@@ -505,8 +507,7 @@ static int
cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
{
- cache_sort(&vlp1, &vlp2);
- MPASS(vlp2 != NULL);
+ cache_sort_vnodes(&vlp1, &vlp2);
if (vlp1 != NULL) {
if (!mtx_trylock(vlp1))
@@ -522,6 +523,19 @@ cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp
}
static void
+cache_lock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
+{
+
+ MPASS(vlp1 != NULL || vlp2 != NULL);
+ MPASS(vlp1 <= vlp2);
+
+ if (vlp1 != NULL)
+ mtx_lock(vlp1);
+ if (vlp2 != NULL)
+ mtx_lock(vlp2);
+}
+
+static void
cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
{
@@ -650,8 +664,6 @@ SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE
*
* A variation of LRU scheme is used. New entries are hashed into one of
* numneglists cold lists. Entries get promoted to the hot list on first hit.
- * Partial LRU for the hot list is maintained by requeueing them every
- * ncneghitsrequeue hits.
*
* The shrinker will demote hot list head and evict from the cold list in a
* round-robin manner.
@@ -660,30 +672,15 @@ static void
cache_negative_hit(struct namecache *ncp)
{
struct neglist *neglist;
- u_int hits;
MPASS(ncp->nc_flag & NCF_NEGATIVE);
- hits = atomic_fetchadd_int(&ncp->nc_neghits, 1);
- if (ncp->nc_flag & NCF_HOTNEGATIVE) {
- if ((hits % ncneghitsrequeue) != 0)
- return;
- mtx_lock(&ncneg_hot.nl_lock);
- if (ncp->nc_flag & NCF_HOTNEGATIVE) {
- TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
- TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
- mtx_unlock(&ncneg_hot.nl_lock);
- return;
- }
- /*
- * The shrinker cleared the flag and removed the entry from
- * the hot list. Put it back.
- */
- } else {
- mtx_lock(&ncneg_hot.nl_lock);
- }
+ if (ncp->nc_flag & NCF_HOTNEGATIVE)
+ return;
neglist = NCP2NEGLIST(ncp);
+ mtx_lock(&ncneg_hot.nl_lock);
mtx_lock(&neglist->nl_lock);
if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
+ numhotneg++;
TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
ncp->nc_flag |= NCF_HOTNEGATIVE;
@@ -737,6 +734,7 @@ cache_negative_remove(struct namecache *ncp, bool neg_
if (ncp->nc_flag & NCF_HOTNEGATIVE) {
mtx_assert(&ncneg_hot.nl_lock, MA_OWNED);
TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
+ numhotneg--;
} else {
mtx_assert(&neglist->nl_lock, MA_OWNED);
TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
@@ -782,8 +780,11 @@ cache_negative_zap_one(void)
struct mtx *dvlp;
struct rwlock *blp;
- if (!mtx_trylock(&ncneg_shrink_lock))
+ if (mtx_owner(&ncneg_shrink_lock) != NULL ||
+ !mtx_trylock(&ncneg_shrink_lock)) {
+ counter_u64_add(shrinking_skipped, 1);
return;
+ }
mtx_lock(&ncneg_hot.nl_lock);
ncp = TAILQ_FIRST(&ncneg_hot.nl_list);
@@ -793,8 +794,10 @@ cache_negative_zap_one(void)
TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
ncp->nc_flag &= ~NCF_HOTNEGATIVE;
+ numhotneg--;
mtx_unlock(&neglist->nl_lock);
}
+ mtx_unlock(&ncneg_hot.nl_lock);
cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
shrink_list_turn++;
@@ -802,16 +805,14 @@ cache_negative_zap_one(void)
shrink_list_turn = 0;
if (ncp == NULL && shrink_list_turn == 0)
cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
- if (ncp == NULL) {
- mtx_unlock(&ncneg_hot.nl_lock);
- goto out;
- }
+ mtx_unlock(&ncneg_shrink_lock);
+ if (ncp == NULL)
+ return;
MPASS(ncp->nc_flag & NCF_NEGATIVE);
dvlp = VP2VNODELOCK(ncp->nc_dvp);
blp = NCP2BUCKETLOCK(ncp);
mtx_unlock(&neglist->nl_lock);
- mtx_unlock(&ncneg_hot.nl_lock);
mtx_lock(dvlp);
rw_wlock(blp);
mtx_lock(&neglist->nl_lock);
@@ -819,18 +820,16 @@ cache_negative_zap_one(void)
if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) ||
blp != NCP2BUCKETLOCK(ncp2) || !(ncp2->nc_flag & NCF_NEGATIVE)) {
ncp = NULL;
- goto out_unlock_all;
- }
- SDT_PROBE3(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
- ncp->nc_name, ncp->nc_neghits);
+ } else {
+ SDT_PROBE2(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
+ ncp->nc_name);
- cache_zap_locked(ncp, true);
-out_unlock_all:
+ cache_zap_locked(ncp, true);
+ counter_u64_add(numneg_evicted, 1);
+ }
mtx_unlock(&neglist->nl_lock);
rw_wunlock(blp);
mtx_unlock(dvlp);
-out:
- mtx_unlock(&ncneg_shrink_lock);
cache_free(ncp);
}
@@ -851,19 +850,16 @@ cache_zap_locked(struct namecache *ncp, bool neg_locke
CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp,
(ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp);
+ LIST_REMOVE(ncp, nc_hash);
if (!(ncp->nc_flag & NCF_NEGATIVE)) {
SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
ncp->nc_name, ncp->nc_vp);
- } else {
- SDT_PROBE3(vfs, namecache, zap_negative, done, ncp->nc_dvp,
- ncp->nc_name, ncp->nc_neghits);
- }
- LIST_REMOVE(ncp, nc_hash);
- if (!(ncp->nc_flag & NCF_NEGATIVE)) {
TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
if (ncp == ncp->nc_vp->v_cache_dd)
ncp->nc_vp->v_cache_dd = NULL;
} else {
+ SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
+ ncp->nc_name);
cache_negative_remove(ncp, neg_locked);
}
if (ncp->nc_flag & NCF_ISDOTDOT) {
@@ -873,7 +869,7 @@ cache_zap_locked(struct namecache *ncp, bool neg_locke
LIST_REMOVE(ncp, nc_src);
if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
ncp->nc_flag |= NCF_DVDROP;
- atomic_subtract_rel_long(&numcachehv, 1);
+ counter_u64_add(numcachehv, -1);
}
}
atomic_subtract_rel_long(&numcache, 1);
@@ -926,7 +922,7 @@ cache_zap_locked_vnode_kl2(struct namecache *ncp, stru
mtx_unlock(*vlpp);
*vlpp = NULL;
}
- cache_sort(&vlp1, &vlp2);
+ cache_sort_vnodes(&vlp1, &vlp2);
if (vlp1 == pvlp) {
mtx_lock(vlp2);
to_unlock = vlp2;
@@ -952,7 +948,7 @@ out_relock:
return (false);
}
-static int
+static int __noinline
cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp)
{
struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
@@ -971,7 +967,7 @@ cache_zap_locked_vnode(struct namecache *ncp, struct v
blp = NCP2BUCKETLOCK(ncp);
vlp1 = VP2VNODELOCK(ncp->nc_dvp);
vlp2 = VP2VNODELOCK(ncp->nc_vp);
- cache_sort(&vlp1, &vlp2);
+ cache_sort_vnodes(&vlp1, &vlp2);
if (vlp1 == pvlp) {
mtx_lock(vlp2);
to_unlock = vlp2;
@@ -991,10 +987,47 @@ out:
return (error);
}
+/*
+ * If trylocking failed we can get here. We know enough to take all needed locks
+ * in the right order and re-lookup the entry.
+ */
static int
-cache_zap_wlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+ struct vnode *dvp, struct mtx *dvlp, struct mtx *vlp, uint32_t hash,
+ struct rwlock *blp)
{
+ struct namecache *rncp;
+
+ cache_assert_bucket_locked(ncp, RA_UNLOCKED);
+
+ cache_sort_vnodes(&dvlp, &vlp);
+ cache_lock_vnodes(dvlp, vlp);
+ rw_wlock(blp);
+ LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
+ if (rncp == ncp && rncp->nc_dvp == dvp &&
+ rncp->nc_nlen == cnp->cn_namelen &&
+ !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen))
+ break;
+ }
+ if (rncp != NULL) {
+ cache_zap_locked(rncp, false);
+ rw_wunlock(blp);
+ cache_unlock_vnodes(dvlp, vlp);
+ counter_u64_add(zap_and_exit_bucket_relock_success, 1);
+ return (0);
+ }
+
+ rw_wunlock(blp);
+ cache_unlock_vnodes(dvlp, vlp);
+ return (EAGAIN);
+}
+
+static int __noinline
+cache_zap_wlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+ uint32_t hash, struct rwlock *blp)
+{
struct mtx *dvlp, *vlp;
+ struct vnode *dvp;
cache_assert_bucket_locked(ncp, RA_WLOCKED);
@@ -1009,14 +1042,17 @@ cache_zap_wlocked_bucket(struct namecache *ncp, struct
return (0);
}
+ dvp = ncp->nc_dvp;
rw_wunlock(blp);
- return (EAGAIN);
+ return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
}
-static int
-cache_zap_rlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+static int __noinline
+cache_zap_rlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+ uint32_t hash, struct rwlock *blp)
{
struct mtx *dvlp, *vlp;
+ struct vnode *dvp;
cache_assert_bucket_locked(ncp, RA_RLOCKED);
@@ -1033,8 +1069,9 @@ cache_zap_rlocked_bucket(struct namecache *ncp, struct
return (0);
}
+ dvp = ncp->nc_dvp;
rw_runlock(blp);
- return (EAGAIN);
+ return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
}
static int
@@ -1049,7 +1086,7 @@ cache_zap_wlocked_bucket_kl(struct namecache *ncp, str
vlp = NULL;
if (!(ncp->nc_flag & NCF_NEGATIVE))
vlp = VP2VNODELOCK(ncp->nc_vp);
- cache_sort(&dvlp, &vlp);
+ cache_sort_vnodes(&dvlp, &vlp);
if (*vlpp1 == dvlp && *vlpp2 == vlp) {
cache_zap_locked(ncp, false);
@@ -1196,14 +1233,13 @@ retry:
goto out_no_entry;
}
- counter_u64_add(numposzaps, 1);
-
- error = cache_zap_wlocked_bucket(ncp, blp);
- if (error != 0) {
+ error = cache_zap_wlocked_bucket(ncp, cnp, hash, blp);
+ if (__predict_false(error != 0)) {
zap_and_exit_bucket_fail++;
cache_maybe_yield();
goto retry;
}
+ counter_u64_add(numposzaps, 1);
cache_free(ncp);
return (0);
out_no_entry:
@@ -1323,7 +1359,7 @@ retry:
}
/* We failed to find an entry */
- if (ncp == NULL) {
+ if (__predict_false(ncp == NULL)) {
rw_runlock(blp);
SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
NULL);
@@ -1331,35 +1367,17 @@ retry:
return (0);
}
- /* We found a "positive" match, return the vnode */
- if (!(ncp->nc_flag & NCF_NEGATIVE)) {
- counter_u64_add(numposhits, 1);
- *vpp = ncp->nc_vp;
- CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
- dvp, cnp->cn_nameptr, *vpp, ncp);
- SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
- *vpp);
- cache_out_ts(ncp, tsp, ticksp);
- goto success;
- }
+ if (ncp->nc_flag & NCF_NEGATIVE)
+ goto negative_success;
-negative_success:
- /* We found a negative match, and want to create it, so purge */
- if (cnp->cn_nameiop == CREATE) {
- counter_u64_add(numnegzaps, 1);
- goto zap_and_exit;
- }
-
- counter_u64_add(numneghits, 1);
- cache_negative_hit(ncp);
- if (ncp->nc_flag & NCF_WHITE)
- cnp->cn_flags |= ISWHITEOUT;
- SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
- ncp->nc_name);
+ /* We found a "positive" match, return the vnode */
+ counter_u64_add(numposhits, 1);
+ *vpp = ncp->nc_vp;
+ CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
+ dvp, cnp->cn_nameptr, *vpp, ncp);
+ SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
+ *vpp);
cache_out_ts(ncp, tsp, ticksp);
- cache_lookup_unlock(blp, dvlp);
- return (ENOENT);
-
success:
/*
* On success we return a locked and ref'd vnode as per the lookup
@@ -1393,13 +1411,30 @@ success:
}
return (-1);
+negative_success:
+ /* We found a negative match, and want to create it, so purge */
+ if (cnp->cn_nameiop == CREATE) {
+ counter_u64_add(numnegzaps, 1);
+ goto zap_and_exit;
+ }
+
+ counter_u64_add(numneghits, 1);
+ cache_negative_hit(ncp);
+ if (ncp->nc_flag & NCF_WHITE)
+ cnp->cn_flags |= ISWHITEOUT;
+ SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
+ ncp->nc_name);
+ cache_out_ts(ncp, tsp, ticksp);
+ cache_lookup_unlock(blp, dvlp);
+ return (ENOENT);
+
zap_and_exit:
if (blp != NULL)
- error = cache_zap_rlocked_bucket(ncp, blp);
+ error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp);
else
error = cache_zap_locked_vnode(ncp, dvp);
- if (error != 0) {
- zap_and_exit_bucket_fail++;
+ if (__predict_false(error != 0)) {
+ zap_and_exit_bucket_fail2++;
cache_maybe_yield();
goto retry;
}
@@ -1435,7 +1470,7 @@ cache_lock_vnodes_cel(struct celockstate *cel, struct
vlp1 = VP2VNODELOCK(vp);
vlp2 = VP2VNODELOCK(dvp);
- cache_sort(&vlp1, &vlp2);
+ cache_sort_vnodes(&vlp1, &vlp2);
if (vlp1 != NULL) {
mtx_lock(vlp1);
@@ -1505,7 +1540,7 @@ cache_lock_buckets_cel(struct celockstate *cel, struct
MPASS(cel->blp[0] == NULL);
MPASS(cel->blp[1] == NULL);
- cache_sort(&blp1, &blp2);
+ cache_sort_vnodes(&blp1, &blp2);
if (blp1 != NULL) {
rw_wlock(blp1);
@@ -1619,6 +1654,33 @@ cache_enter_unlock(struct celockstate *cel)
cache_unlock_vnodes_cel(cel);
}
+static void __noinline
+cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
+ struct componentname *cnp)
+{
+ struct celockstate cel;
+ struct namecache *ncp;
+ uint32_t hash;
+ int len;
+
+ if (dvp->v_cache_dd == NULL)
+ return;
+ len = cnp->cn_namelen;
+ cache_celockstate_init(&cel);
+ hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
+ cache_enter_lock_dd(&cel, dvp, vp, hash);
+ ncp = dvp->v_cache_dd;
+ if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
+ KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
+ cache_zap_locked(ncp, false);
+ } else {
+ ncp = NULL;
+ }
+ dvp->v_cache_dd = NULL;
+ cache_enter_unlock(&cel);
+ cache_free(ncp);
+}
+
/*
* Add an entry to the cache.
*/
@@ -1630,12 +1692,10 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp,
struct namecache *ncp, *n2, *ndd;
struct namecache_ts *ncp_ts, *n2_ts;
struct nchashhead *ncpp;
- struct neglist *neglist;
uint32_t hash;
int flag;
int len;
- bool neg_locked;
- int lnumcache;
+ u_long lnumcache;
CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
@@ -1646,69 +1706,28 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp,
if (__predict_false(!doingcache))
return;
+ flag = 0;
+ if (__predict_false(cnp->cn_nameptr[0] == '.')) {
+ if (cnp->cn_namelen == 1)
+ return;
+ if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
+ cache_enter_dotdot_prep(dvp, vp, cnp);
+ flag = NCF_ISDOTDOT;
+ }
+ }
+
/*
* Avoid blowout in namecache entries.
*/
- if (__predict_false(numcache >= desiredvnodes * ncsizefactor))
+ lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
+ if (__predict_false(lnumcache >= ncsize)) {
+ atomic_add_long(&numcache, -1);
return;
+ }
cache_celockstate_init(&cel);
ndd = NULL;
ncp_ts = NULL;
- flag = 0;
- if (cnp->cn_nameptr[0] == '.') {
- if (cnp->cn_namelen == 1)
- return;
- if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
- len = cnp->cn_namelen;
- hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
- cache_enter_lock_dd(&cel, dvp, vp, hash);
- /*
- * If dotdot entry already exists, just retarget it
- * to new parent vnode, otherwise continue with new
- * namecache entry allocation.
- */
- if ((ncp = dvp->v_cache_dd) != NULL &&
- ncp->nc_flag & NCF_ISDOTDOT) {
- KASSERT(ncp->nc_dvp == dvp,
- ("wrong isdotdot parent"));
- neg_locked = false;
- if (ncp->nc_flag & NCF_NEGATIVE || vp == NULL) {
- neglist = NCP2NEGLIST(ncp);
- mtx_lock(&ncneg_hot.nl_lock);
- mtx_lock(&neglist->nl_lock);
- neg_locked = true;
- }
- if (!(ncp->nc_flag & NCF_NEGATIVE)) {
- TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
- ncp, nc_dst);
- } else {
- cache_negative_remove(ncp, true);
- }
- if (vp != NULL) {
- TAILQ_INSERT_HEAD(&vp->v_cache_dst,
- ncp, nc_dst);
- ncp->nc_flag &= ~(NCF_NEGATIVE|NCF_HOTNEGATIVE);
- } else {
- ncp->nc_flag &= ~(NCF_HOTNEGATIVE);
- ncp->nc_flag |= NCF_NEGATIVE;
- cache_negative_insert(ncp, true);
- }
- if (neg_locked) {
- mtx_unlock(&neglist->nl_lock);
- mtx_unlock(&ncneg_hot.nl_lock);
- }
- ncp->nc_vp = vp;
- cache_enter_unlock(&cel);
- return;
- }
- dvp->v_cache_dd = NULL;
- cache_enter_unlock(&cel);
- cache_celockstate_init(&cel);
- SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
- flag = NCF_ISDOTDOT;
- }
- }
/*
* Calculate the hash key and setup as much of the new
@@ -1800,7 +1819,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp,
if (flag != NCF_ISDOTDOT) {
if (LIST_EMPTY(&dvp->v_cache_src)) {
vhold(dvp);
- atomic_add_rel_long(&numcachehv, 1);
+ counter_u64_add(numcachehv, 1);
}
LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
}
@@ -1828,7 +1847,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp,
ncp->nc_name);
}
cache_enter_unlock(&cel);
- lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
if (numneg * ncnegfactor > lnumcache)
cache_negative_zap_one();
cache_free(ndd);
@@ -1875,20 +1893,23 @@ nchinit(void *dummy __unused)
NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts),
UMA_ZONE_ZINIT);
+ ncsize = desiredvnodes * ncsizefactor;
nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
- ncbuckethash = cache_roundup_2(mp_ncpus * 64) - 1;
+ ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1;
+ if (ncbuckethash < 7) /* arbitrarily chosen to avoid having one lock */
+ ncbuckethash = 7;
if (ncbuckethash > nchash)
ncbuckethash = nchash;
bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE,
M_WAITOK | M_ZERO);
for (i = 0; i < numbucketlocks; i++)
rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE);
- ncvnodehash = cache_roundup_2(mp_ncpus * 64) - 1;
+ ncvnodehash = ncbuckethash;
vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE,
M_WAITOK | M_ZERO);
for (i = 0; i < numvnodelocks; i++)
mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE);
- ncpurgeminvnodes = numbucketlocks;
+ ncpurgeminvnodes = numbucketlocks * 2;
ncneghash = 3;
neglists = malloc(sizeof(*neglists) * numneglists, M_VFSCACHE,
@@ -1902,6 +1923,7 @@ nchinit(void *dummy __unused)
mtx_init(&ncneg_shrink_lock, "ncnegs", NULL, MTX_DEF);
+ numcachehv = counter_u64_alloc(M_WAITOK);
numcalls = counter_u64_alloc(M_WAITOK);
dothits = counter_u64_alloc(M_WAITOK);
dotdothits = counter_u64_alloc(M_WAITOK);
@@ -1917,6 +1939,9 @@ nchinit(void *dummy __unused)
numfullpathfail2 = counter_u64_alloc(M_WAITOK);
numfullpathfail4 = counter_u64_alloc(M_WAITOK);
numfullpathfound = counter_u64_alloc(M_WAITOK);
+ zap_and_exit_bucket_relock_success = counter_u64_alloc(M_WAITOK);
+ numneg_evicted = counter_u64_alloc(M_WAITOK);
+ shrinking_skipped = counter_u64_alloc(M_WAITOK);
}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
@@ -1927,8 +1952,10 @@ cache_changesize(int newmaxvnodes)
u_long new_nchash, old_nchash;
struct namecache *ncp;
uint32_t hash;
+ int newncsize;
int i;
+ newncsize = newmaxvnodes * ncsizefactor;
newmaxvnodes = cache_roundup_2(newmaxvnodes * 2);
if (newmaxvnodes < numbucketlocks)
newmaxvnodes = numbucketlocks;
@@ -1958,6 +1985,7 @@ cache_changesize(int newmaxvnodes)
LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
}
}
+ ncsize = newncsize;
cache_unlock_all_buckets();
cache_unlock_all_vnodes();
free(old_nchashtbl, M_VFSCACHE);
More information about the svn-src-all
mailing list