PERFORCE change 179615 for review
Gleb Kurtsou
gk at FreeBSD.org
Mon Jun 14 19:25:55 UTC 2010
http://p4web.freebsd.org/@@179615?ac=10
Change 179615 by gk at gk_h1 on 2010/06/14 19:25:33
Fix bugs, add missings bits to make cache operations for tmpfs.
Affected files ...
.. //depot/projects/soc2010/gk_namecache/sys/kern/subr_witness.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_subr.c#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#2 edit
Differences ...
==== //depot/projects/soc2010/gk_namecache/sys/kern/subr_witness.c#2 (text+ko) ====
@@ -614,6 +614,19 @@
{ "vnode interlock", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
+ * dircache pool locks/vnode interlock
+ */
+ { "dircache lock 0", &lock_class_mtx_sleep },
+ { "dircache lock 1", &lock_class_mtx_sleep },
+ { "dircache lock 2", &lock_class_mtx_sleep },
+ { "dircache lock 3", &lock_class_mtx_sleep },
+ { "dircache lock 4", &lock_class_mtx_sleep },
+ { "dircache lock 5", &lock_class_mtx_sleep },
+ { "dircache lock 6", &lock_class_mtx_sleep },
+ { "dircache lock 7", &lock_class_mtx_sleep },
+ { "vnode interlock", &lock_class_mtx_sleep },
+ { NULL, NULL },
+ /*
* ZFS locking
*/
{ "dn->dn_mtx", &lock_class_sx },
==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#2 (text+ko) ====
@@ -49,13 +49,37 @@
#include <sys/dircache.h>
-#define DCDEBUG(format, args...) printf(format ,## args)
+#define DCDEBUG(format, args...) \
+ do { \
+ if (dircache_debug != 0) \
+ printf(format ,## args); \
+ } while (0)
+
+#define DIRCACHE_STAT(n, descr) \
+ SYSCTL_PROC(_vfs_dircache, OID_AUTO, n, \
+ CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, \
+ NULL, __CONCAT(dps_, n), dps_sysctlhandler, "LU", descr)
+
+#define DC_NAMEROUND 16 /* power of 2 */
+
+enum {
+ dps_interlock_same,
+ dps_interlock_direct,
+ dps_interlock_reverse,
+ dps_interlock_reverse_fast,
+ dps_max
+};
struct nspace;
+struct dircache_poolstat {
+ u_long dps_stats[dps_max];
+};
+
struct dircache_pool {
struct mtx dp_mtx;
u_long dp_gen;
+ struct dircache_poolstat dp_stat;
};
struct dircache_root {
@@ -63,30 +87,68 @@
struct dircache *dr_entry;
};
+SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache");
static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers");
static uma_zone_t dircache_zone;
static struct dircache_pool **pool;
static size_t pool_size;
static u_long pool_id;
+static char **pool_mtxname;
+static const int pool_mtxnamesz = 20;
+static int dircache_debug = 1;
+SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0,
+ "Enable debug");
+
+static int dps_sysctlhandler(SYSCTL_HANDLER_ARGS);
+
+DIRCACHE_STAT(interlock_same,
+ "Same lock hits in interlock");
+DIRCACHE_STAT(interlock_direct,
+ "Direct lock order hits in interlock");
+DIRCACHE_STAT(interlock_reverse,
+ "Reverse lock order hits in interlock");
+DIRCACHE_STAT(interlock_reverse_fast,
+ "Reverse lock order without sleeping hits in interlock");
+
+static int
+ptr_cmp(const void *a, const void *b)
+{
+ return (((uintptr_t)(*(void * const *)a)) -
+ ((uintptr_t)(*(void * const *)b)));
+}
+
static void
dircache_sysinit(void *arg __unused)
{
int i;
- pool_size = 1;
+ pool_size = 4;
dircache_zone = uma_zcreate("dircache",
sizeof(struct dircache), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
pool = malloc(sizeof(void *) * pool_size,
M_DIRCACHE, M_WAITOK);
+ pool_mtxname = malloc(sizeof(void *) * pool_size,
+ M_DIRCACHE, M_WAITOK);
+ /*
+ * Keep struct dircache_pool size minimal. (and align at cache
+ * pipeline?)
+ * Use pool address for lock ordering.
+ */
for (i = 0; i < pool_size; i++) {
+ pool_mtxname[i] = malloc(pool_mtxnamesz,
+ M_DIRCACHE, M_WAITOK | M_ZERO);
pool[i] = malloc(sizeof(struct dircache_pool),
M_DIRCACHE, M_WAITOK | M_ZERO);
+ }
+ qsort(pool, pool_size, sizeof(void *), ptr_cmp);
+ for (i = 0; i < pool_size; i++) {
pool[i]->dp_gen = pool_id++;
- mtx_init(&pool[i]->dp_mtx, "dircache lock", NULL, MTX_DEF);
+ snprintf(pool_mtxname[i], pool_mtxnamesz, "dircache lock %d", i);
+ mtx_init(&pool[i]->dp_mtx, pool_mtxname[i], NULL, MTX_DEF);
}
}
SYSINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysinit, NULL);
@@ -99,13 +161,62 @@
for (i = 0; i < pool_size; i++) {
mtx_destroy(&pool[i]->dp_mtx);
free(pool[i], M_DIRCACHE);
+ free(pool_mtxname[i], M_DIRCACHE);
}
free(pool, M_DIRCACHE);
+ free(pool_mtxname, M_DIRCACHE);
pool = NULL;
uma_zdestroy(dircache_zone);
}
SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL);
+static void
+pool_getstats(struct dircache_poolstat *ps)
+{
+ struct dircache_poolstat *stat;
+ int i, ind;
+
+ for (i = 0; i < pool_size; i++) {
+ mtx_lock(&pool[i]->dp_mtx);
+ stat = &pool[i]->dp_stat;
+ for (ind = 0; ind < dps_max; ind++)
+ ps->dps_stats[ind] += stat->dps_stats[ind];
+ mtx_unlock(&pool[i]->dp_mtx);
+ }
+}
+
+static int
+dps_sysctlhandler(SYSCTL_HANDLER_ARGS)
+{
+ struct dircache_poolstat st = {};
+ u_long res;
+ int error;
+
+ pool_getstats(&st);
+ res = st.dps_stats[arg2];
+ error = SYSCTL_OUT(req, &res, sizeof(res));
+
+ return (error);
+}
+
+static __inline void
+dp_incstat(int ind, struct dircache_pool *dp, u_long val)
+{
+ dp->dp_stat.dps_stats[ind] += val;
+}
+
+static __inline int
+dc_cmpname(struct dircache *dc, char *name, size_t namelen)
+{
+ int r;
+
+ r = dc->dc_namelen - namelen;
+ if (r != 0)
+ return (r);
+ r = bcmp(dc->dc_name, name, namelen);
+ return (r);
+}
+
static __inline int
dc_cmp(struct dircache *a, struct dircache *b)
{
@@ -123,57 +234,61 @@
RB_GENERATE_STATIC(dircache_tree, dircache, dc_listentry, dc_cmp);
+#define DC_MTX(dc) (&(dc)->dc_pool->dp_mtx)
+
+#define dc_lock(dc) mtx_lock(DC_MTX(dc))
+
+#define dc_trylock(dc) mtx_trylock(DC_MTX(dc))
+
+#define dc_unlock(dc) mtx_unlock(DC_MTX(dc))
+
+#define dc_assertlock(dc, w) mtx_assert(DC_MTX(dc), (w))
-static __inline void
-dc_lock(struct dircache *dc)
+static void
+dc_relock(struct dircache *from, struct dircache *to)
{
- mtx_lock(&dc->dc_pool->dp_mtx);
-}
+ dc_assertlock(from, MA_OWNED);
-static __inline int
-dc_trylock(struct dircache *dc)
-{
- return (mtx_trylock(&dc->dc_pool->dp_mtx));
-}
+ if (from->dc_pool == to->dc_pool)
+ return;
-static __inline void
-dc_unlock(struct dircache *dc)
-{
- mtx_unlock(&dc->dc_pool->dp_mtx);
-}
+ dc_assertlock(to, MA_NOTOWNED);
-static __inline void
-dc_assertlock(struct dircache *dc, int what)
-{
- mtx_assert(&dc->dc_pool->dp_mtx, what);
+ dc_unlock(from);
+ dc_lock(to);
}
static void
-dc_interlock(struct dircache *a, struct dircache *b)
+dc_interlock(struct dircache *from, struct dircache *to)
{
- dc_assertlock(a, MA_OWNED);
+ dc_assertlock(from, MA_OWNED);
- if (a->dc_pool == b->dc_pool)
+ if (from->dc_pool == to->dc_pool) {
+ dp_incstat(dps_interlock_same, to->dc_pool, 1);
return;
+ }
- dc_assertlock(b, MA_NOTOWNED);
- if ((uintptr_t)a->dc_pool < (uintptr_t)b->dc_pool) {
- dc_lock(b);
- dc_unlock(a);
+ dc_assertlock(to, MA_NOTOWNED);
+ if ((uintptr_t)from->dc_pool < (uintptr_t)to->dc_pool) {
+ dc_lock(to);
+ dc_unlock(from);
+ dp_incstat(dps_interlock_direct, to->dc_pool, 1);
return;
}
critical_enter();
- if (dc_trylock(b) != 0) {
- dc_unlock(a);
+ if (dc_trylock(to) != 0) {
+ dc_unlock(from);
critical_exit();
+ dp_incstat(dps_interlock_reverse_fast, to->dc_pool, 1);
return;
}
critical_exit();
/* !!!! FIXME !!!! */
- dc_unlock(a);
- dc_lock(b);
+ dc_unlock(from);
+ dc_lock(to);
+ dp_incstat(dps_interlock_reverse, to->dc_pool, 1);
}
static __inline void
@@ -184,16 +299,41 @@
dc->dc_namehash = hash32_buf(name, namelen, HASHINIT * namelen);
}
+static __inline size_t
+dc_namebuflen(size_t namelen)
+{
+ return (roundup2(namelen + 1, DC_NAMEROUND));
+}
+
+static __inline int
+dc_namebuffits(struct dircache *dc, size_t namelen)
+{
+ return (dc_namebuflen(dc->dc_namelen) < namelen + 1);
+}
+
+static __inline char *
+dc_allocnamebuf(size_t namelen)
+{
+ char * buf;
+
+ buf = malloc(dc_namebuflen(namelen), M_DIRCACHE, M_WAITOK);
+ return (buf);
+}
+
static __inline void
-dc_setname(struct dircache *dc, char *name, size_t namelen)
+dc_setname(struct dircache *dc, char *name, size_t namelen, char *namebuf)
{
MPASS(name != dc->dc_name);
- if (dc->dc_name == NULL || dc->dc_namelen < namelen) {
+ if (dc->dc_name == NULL || dc_namebuffits(dc, namelen) == 0) {
if (dc->dc_name != NULL)
free(dc->dc_name, M_DIRCACHE);
- dc->dc_name = malloc(namelen + 1, M_DIRCACHE, M_WAITOK);
- }
+ if (namebuf == NULL)
+ dc->dc_name = dc_allocnamebuf(namelen);
+ else
+ dc->dc_name = namebuf;
+ } else
+ MPASS(namebuf == NULL);
memcpy(dc->dc_name, name, namelen);
dc->dc_name[namelen] = '\0';
dc_initname(dc, dc->dc_name, namelen);
@@ -210,7 +350,6 @@
dc->dc_gen = *genp;
}
-
static struct dircache *
dc_alloc(struct dircache *pdc, enum dircache_type type,
char *name, size_t namelen)
@@ -220,22 +359,23 @@
dc = uma_zalloc(dircache_zone, M_WAITOK | M_ZERO);
DCDEBUG("alloc: %p %s\n", dc, name);
+
+ dc->dc_type = type;
+ dc->dc_flags = DC_CH_PARTIAL;
+ dc->dc_parent = pdc;
+ refcount_init(&dc->dc_refcnt, 1);
cv_init(&dc->dc_condvar, "dircache cv");
+
if (name != NULL && namelen != 0) {
- dc_setname(dc, name, namelen);
- dc->dc_parent = pdc;
+ dc_setname(dc, name, namelen, NULL);
/* cheaper way to get pseudo-random value */
poolind = dc->dc_namehash;
} else {
poolind = arc4random();
}
-
poolind %= pool_size;
dc->dc_pool = pool[poolind];
- dc->dc_flags = DC_CH_PARTIAL;
- refcount_init(&dc->dc_refcnt, 1);
-
return (dc);
}
@@ -290,18 +430,6 @@
}
static void
-dc_refvnode_locked(struct dircache *dc, struct vnode *vp)
-{
- dc_ref(dc);
- MPASS(dc->dc_vnode == NULL);
- dc->dc_vnode = vp;
- TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist);
- DCDEBUG("refvnode locked: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
- vp, dc->dc_refcnt);
- MPASS(TAILQ_FIRST(&vp->v_dircache) == dc);
-}
-
-static void
dc_refvnode(struct dircache *dc, struct vnode *vp)
{
if (dc->dc_type != DT_ROOT)
@@ -309,57 +437,102 @@
DCDEBUG("refvnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
vp, dc->dc_refcnt);
+ MPASS(vp->v_type != VNON && vp->v_type != VBAD);
+ MPASS(dc->dc_vnode == NULL);
dc_ref(dc);
- MPASS(dc->dc_vnode == NULL);
dc->dc_vnode = vp;
- dc_unlock(dc);
VI_LOCK(vp);
+ if (vp->v_type == VDIR && !TAILQ_EMPTY(&vp->v_dircache))
+ panic("dircache: multiple directory vnode references %p", vp);
TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist);
VI_UNLOCK(vp);
}
static void
-dc_relevnode(struct dircache *dc, struct vnode *vp)
+dc_relevnode(struct dircache *dc)
{
MPASS(dc->dc_vnode != NULL);
+ dc_assertlock(dc, MA_OWNED);
DCDEBUG("relevnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
- vp, dc->dc_refcnt);
+ dc->dc_vnode, dc->dc_refcnt);
- TAILQ_REMOVE(&vp->v_dircache, dc, dc_vnodelist);
+ VI_LOCK(dc->dc_vnode);
+ TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_vnodelist);
+ VI_UNLOCK(dc->dc_vnode);
dc->dc_vnode = NULL;
dc_unlock(dc);
dc_rele(dc);
}
static struct dircache *
-dc_getentry(struct vnode *vp, struct dircache *parent_hint,
- struct nspace *nspace_hint, int flags)
+dc_getentry(struct vnode *vp, struct componentname *cnp, struct vnode *dvp)
{
struct dircache *dc;
+restart:
VI_LOCK(vp);
dc = TAILQ_FIRST(&vp->v_dircache);
if (dc == NULL) {
+ VI_UNLOCK(vp);
if ((vp->v_vflag & VV_ROOT) != 0) {
dc = vp->v_mount->mnt_dircache;
DCDEBUG("getentry: root %p vp=%p\n", dc, vp);
MPASS(dc != NULL);
dc_lock(dc);
- dc_refvnode_locked(dc, vp);
- goto out;
+ dc_refvnode(dc, vp);
} else {
- VI_UNLOCK(vp);
+#if 0
DCDEBUG("getentry: not found vp=%p\n", vp);
+#else
+ panic("dircache: entry not found for vnode %p\n", vp);
+#endif
return (NULL);
}
+ } else {
+ if (TAILQ_NEXT(dc, dc_vnodelist) != NULL) {
+ MPASS(cnp != NULL && dvp != NULL);
+ MPASS(vp->v_type != VDIR);
+ MPASS(!(cnp->cn_nameptr[0] == '.' &&
+ (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 &&
+ cnp->cn_nameptr[1] == '.'))));
+
+ for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_vnodelist)) {
+ VI_UNLOCK(vp);
+ dc_lock(dc);
+ if (dc->dc_vnode != vp) {
+ dc_unlock(dc);
+ DCDEBUG("getenrty: restart; multiple entries; vp=%p\n",
+ vp);
+ goto restart;
+ }
+ /* FIXME: dc_parent locking */
+ if (dc_cmpname(dc, cnp->cn_nameptr,
+ cnp->cn_namelen) == 0 &&
+ dvp == dc->dc_parent->dc_vnode)
+ break;
+ dc_unlock(dc);
+ VI_LOCK(vp);
+ }
+ if (dc == NULL) {
+ VI_UNLOCK(vp);
+#if 0
+ return (NULL);
+#else
+ panic("dircache: entry not found for vnode %p (multiple)\n", vp);
+#endif
+ }
+ } else {
+ VI_UNLOCK(vp);
+ dc_lock(dc);
+ }
}
- MPASS(TAILQ_NEXT(dc, dc_vnodelist) == NULL);
- dc_lock(dc);
-out:
- if ((flags & DC_OP_LOCKVP) == 0)
- VI_UNLOCK(vp);
-
+ dc_assertlock(dc, MA_OWNED);
+ if (dc->dc_vnode != vp) {
+ dc_unlock(dc);
+ DCDEBUG("getenrty: restart; vp=%p\n", vp);
+ goto restart;
+ }
return (dc);
}
@@ -369,7 +542,7 @@
struct dircache key;
struct dircache *pdc, *dc;
- pdc = dc_getentry(dvp, NULL, NULL, 0);
+ pdc = dc_getentry(dvp, NULL, NULL);
if (pdc == NULL)
return (NULL);
dc_assertlock(pdc, MA_OWNED);
@@ -381,15 +554,107 @@
return (NULL);
}
+ if ((flags & DC_OP_NOWAIT) == 0)
+ dc_wait(pdc);
+
dc_interlock(pdc, dc);
dc_assertlock(dc, MA_OWNED);
+ MPASS(dc->dc_parent == pdc);
+
+ return (dc);
+}
- if ((flags & DC_OP_NOWAIT) == 0)
- dc_wait(dc);
+static struct dircache *
+dc_update(struct dircache_cursor *curs, struct vnode *vp,
+ enum dircache_type type, char *name, size_t namelen,
+ ino_t inode, off_t offset, void *fspriv)
+{
+ struct dircache *dc, *pdc, *col;
+
+ pdc = curs->dcr_parent;
+ MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0);
+
+ DCDEBUG("update: parent=%p name=%s\n", pdc, name);
+
+ dc = dc_alloc(pdc, type, name, namelen);
+ if (type == DT_WEAK)
+ curs->dcr_nflags |= DC_CH_HASWEAK;
+ dc->dc_fspriv = fspriv;
+ col = RB_INSERT(dircache_tree, &pdc->dc_children, dc);
+ if (col != NULL) {
+ if (type == col->dc_type) {
+ DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name);
+ MPASS(col->dc_inode == inode && col->dc_offset == offset);
+ dc->dc_fspriv = NULL;
+ dc->dc_parent = NULL;
+ dc_rele(dc);
+ return (NULL);
+ } else if (col->dc_type == DT_NEGATIVE) {
+ DCDEBUG("update: replace negative entry: %p %s\n", dc, dc->dc_name);
+ dc_lock(col);
+ col->dc_type = type;
+ MPASS((col->dc_flags & DC_CH_COMPLETE) == 0);
+ col->dc_flags |= DC_CH_PARTIAL;
+ col->dc_inode = inode;
+ col->dc_offset = offset;
+ MPASS(col->dc_fspriv == NULL);
+ col->dc_fspriv = fspriv;
+ dc->dc_fspriv = NULL;
+ dc_unlock(col);
+ dc->dc_parent = NULL;
+ dc_rele(dc);
+ dc = col;
+ } else
+ panic("dircache: update: ivalid entry: %d %s\n",
+ dc->dc_type, dc->dc_name);
+ } else
+ dc_ref(pdc);
+ if (vp != NULL) {
+ dc_lock(dc);
+ dc_refvnode(dc, vp);
+ dc_unlock(dc);
+ }
return (dc);
}
+static void
+dc_removeentry(struct dircache *dc)
+{
+ struct dircache *parent;
+ MPASS(dc->dc_parent != NULL);
+
+ dc->dc_fspriv = NULL;
+ dc->dc_type = DT_INVAL;
+ parent = dc->dc_parent;
+ dc->dc_parent = NULL;
+ RB_REMOVE(dircache_tree, &parent->dc_children, dc);
+ if (dc->dc_vnode != NULL)
+ dc_relevnode(dc);
+ else
+ dc_unlock(dc);
+ dc_rele(parent);
+ dc_rele(dc);
+}
+
+static void
+dc_marknegative(struct dircache *dc)
+{
+ DCDEBUG("mark negative: %p %s; vp=%p\n", dc, dc->dc_name, dc->dc_vnode);
+ dc->dc_inode = 0;
+ dc->dc_offset = 0;
+ dc->dc_fspriv = NULL;
+ dc->dc_type = DT_NEGATIVE;
+ dc->dc_flags &= ~DC_CH_COMPLETE;
+ dc->dc_flags |= DC_CH_PARTIAL;
+ dc_updategen(dc);
+ if (dc->dc_vnode != NULL)
+ dc_relevnode(dc);
+ else
+ dc_unlock(dc);
+ dc_assertlock(dc, MA_NOTOWNED);
+}
+
void
dircache_init(struct mount *mp, ino_t inode)
{
@@ -416,15 +681,22 @@
{
struct dircache *dc, *ch, *tmp;
+restart:
VI_LOCK(vp);
TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) {
DCDEBUG("purge negative: %p %s; vp=%p\n", dc, dc->dc_name, vp);
+ VI_UNLOCK(vp);
dc_lock(dc);
+ if (dc->dc_vnode != vp) {
+ dc_unlock(dc);
+ goto restart;
+ }
RB_FOREACH_SAFE(ch, dircache_tree, &dc->dc_children, tmp) {
- if (dc->dc_type == DT_NEGATIVE)
+ if (ch->dc_type == DT_NEGATIVE)
RB_REMOVE(dircache_tree, &dc->dc_children,
ch);
}
+ VI_LOCK(vp);
dc_unlock(dc);
}
VI_UNLOCK(vp);
@@ -434,29 +706,7 @@
dircache_update(struct dircache_cursor *curs, enum dircache_type type,
char *name, size_t namelen, ino_t inode, off_t offset)
{
- struct dircache *dc, *pdc, *col;
-
- pdc = curs->dcr_parent;
- MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0);
-
- DCDEBUG("update: parent=%p name=%s\n", pdc, name);
-
- dc = dc_alloc(pdc, type, name, namelen);
-
- if (type == DT_WEAK)
- curs->dcr_nflags |= DC_CH_HASWEAK;
- col = RB_INSERT(dircache_tree, &pdc->dc_children, dc);
- if (col == NULL)
- dc_ref(pdc);
- else {
- if (type == col->dc_type) {
- DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name);
- dc_free(dc);
- return;
- }
- panic("dircache: unexpected entry during update: %d %s\n",
- dc->dc_type, dc->dc_name);
- }
+ dc_update(curs, NULL, type, name, namelen, inode, offset, NULL);
}
int
@@ -468,7 +718,7 @@
curs->dcr_parent = NULL;
- dc = dc_getentry(dvp, NULL, NULL, 0);
+ dc = dc_getentry(dvp, NULL, NULL);
if (dc == NULL) {
DCDEBUG("beginupdate: not found dvp=%p; path=%s\n",
dvp, cnp->cn_nameptr);
@@ -542,32 +792,100 @@
dircache_endupdate(curs, DC_CH_PARTIAL);
}
+static int
+dircache_lookupdot(struct vnode *dvp, struct vnode **vpp,
+ struct componentname *cnp)
+{
+ int ltype;
+
+ MPASS(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.');
+
+ DCDEBUG("lookup dot: dvp=%p\n", dvp);
+
+ *vpp = dvp;
+ vref(*vpp);
+ /*
+ * When we lookup "." we still can be asked to lock it
+ * differently.
+ */
+ ltype = cnp->cn_lkflags & LK_TYPE_MASK;
+ if (ltype != VOP_ISLOCKED(*vpp)) {
+ if (ltype == LK_EXCLUSIVE) {
+ vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
+ if ((*vpp)->v_iflag & VI_DOOMED) {
+ /* forced unmount */
+ vrele(*vpp);
+ *vpp = NULL;
+ return (ENOENT);
+ }
+ } else
+ vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
+ }
+ return (-1);
+}
+
int
dircache_lookup(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp)
{
- struct dircache *dc;
- int error;
+ struct dircache *pdc, *dc;
+ int error, ltype;
- dc = dc_find(dvp, cnp, 0);
+ if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1)
+ return (dircache_lookupdot(dvp, vpp, cnp));
+ else if (cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.' &&
+ cnp->cn_namelen == 2) {
+ MPASS((cnp->cn_flags & ISDOTDOT) != 0);
+ pdc = dc_getentry(dvp, NULL, NULL);
+ if (pdc != NULL) {
+ DCDEBUG("lookup dotdot: dvp=%p\n", dvp);
+ dc = pdc->dc_parent;
+ dc_interlock(pdc, dc);
+ } else
+ dc = NULL;
+ } else
+ dc = dc_find(dvp, cnp, 0);
if (dc == NULL) {
- DCDEBUG("lookup: not found: %s; dvp=%p\n", cnp->cn_nameptr, dvp);
+ DCDEBUG("lookup: not found: %s; dvp=%p; op=%ld\n",
+ cnp->cn_nameptr, dvp, cnp->cn_nameiop);
return (0);
}
error = 0;
if (dc->dc_type == DT_NEGATIVE)
- error = ENOENT;
+ switch (cnp->cn_nameiop) {
+ case CREATE:
+ case RENAME:
+ error = 0;
+ break;
+ default:
+ error = ENOENT;
+ }
else if (dc->dc_vnode != NULL) {
*vpp = dc->dc_vnode;
error = -1;
}
dc_unlock(dc);
- DCDEBUG("lookup: error=%d: %p %s; dvp=%p\n", error, dc, dc->dc_name, dvp);
+ DCDEBUG("lookup: error=%d: %p %s; dvp=%p; op=%ld\n", error, dc,
+ dc->dc_name, dvp, cnp->cn_nameiop);
if (error == -1) {
+ ltype = 0;
+ if ((cnp->cn_flags & ISDOTDOT) != 0) {
+ ltype = VOP_ISLOCKED(dvp);
+ VOP_UNLOCK(dvp, 0);
+ }
if (vget(*vpp, cnp->cn_lkflags, cnp->cn_thread) != 0) {
*vpp = NULL;
error = 0;
}
+ if (cnp->cn_flags & ISDOTDOT) {
+ vn_lock(dvp, ltype | LK_RETRY);
+ if (dvp->v_iflag & VI_DOOMED) {
+ if (error == 0)
+ vput(*vpp);
+ *vpp = NULL;
+ return (ENOENT);
+ }
+ }
}
return (error);
}
@@ -583,23 +901,12 @@
DCDEBUG("add: inode=%d %s; vp=%p\n", inode, cnp->cn_nameptr, vp);
if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0)
return (ENOENT);
- dircache_update(&curs, type, cnp->cn_nameptr, cnp->cn_namelen,
- inode, offset);
+ dc_update(&curs, vp, type, cnp->cn_nameptr, cnp->cn_namelen,
+ inode, offset, NULL);
dircache_endupdate(&curs, 0);
return (0);
}
-static void
-dc_remove(struct dircache *dc, struct vnode *vp)
-{
- dc->dc_inode = 0;
- dc->dc_offset = 0;
- dc->dc_fspriv = NULL;
- dc->dc_type = DT_NEGATIVE;
- dc_updategen(dc);
- dc_relevnode(dc, vp);
-}
-
int
dircache_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
{
@@ -607,16 +914,18 @@
struct dircache *dc;
DCDEBUG("remove: %s; vp=%p\n", cnp->cn_nameptr, vp);
- if (dircache_beginupdate(&curs, dvp, cnp, 0) == 0)
+ if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0)
return (ENOENT);
- dc = dc_getentry(vp, curs.dcr_parent, NULL, DC_OP_LOCKVP);
+ dc = dc_getentry(vp, cnp, dvp);
if (dc == NULL) {
+ DCDEBUG("remove: vp not found: %s vp=%p\n", cnp->cn_nameptr,
+ vp);
MPASS(dc_find(dvp, cnp, DC_OP_NOWAIT) == 0);
dircache_endupdate(&curs, 0);
return (ENOENT);
}
- dc_remove(dc, vp);
+ dc_marknegative(dc);
dircache_endupdate(&curs, 0);
return (0);
}
@@ -628,6 +937,7 @@
{
struct dircache_cursor fcr, tcr;
struct dircache *fdc, *tdc, *col;
+ char *namebuf;
int error;
MPASS(fvp != tvp);
@@ -638,48 +948,83 @@
if (fdvp == tdvp) {
if (tcr.dcr_parent == NULL)
return (ENOENT);
+ fcr.dcr_parent = tcr.dcr_parent;
} else {
dircache_beginupdate(&fcr, fdvp, fcnp, 0);
+#if 0
if (tcr.dcr_parent == NULL && fcr.dcr_parent == NULL) {
- MPASS(dc_getentry(fvp, NULL, NULL, 0) == NULL);
+ MPASS(dc_getentry(fvp, fcnp) == NULL);
MPASS(tvp == NULL ||
- dc_getentry(tvp, NULL, NULL, 0) == NULL);
+ dc_getentry(tvp, NULL) == NULL);
return (ENOENT);
} else if (tcr.dcr_parent == NULL) {
- error = dircache_remove(fdvp, fvp, fcnp);
+ fdc = dc_getentry(fvp, fcnp);
+ if (fdc != NULL) {
+ dc_marknegative(fdc, fvp);
+ } else {
+ MPASS(dc_find(fdvp, fcnp, DC_OP_NOWAIT) == 0);
+ error = ENOENT;
+ }
dircache_endupdate(&fcr, 0);
return (error);
} else if (fcr.dcr_parent == NULL) {
dircache_partialupdate(&tcr);
return (0);
}
+#else
+ MPASS(tcr.dcr_parent != NULL && fcr.dcr_parent != NULL);
+#endif
}
if (tvp != NULL) {
- tdc = dc_getentry(tvp, tcr.dcr_parent, NULL, DC_OP_LOCKVP);
+ tdc = dc_getentry(tvp, tcnp, tdvp);
if (tdc != NULL) {
- MPASS(fcr.dcr_parent == tdc->dc_parent);
- dc_remove(tdc, tvp);
+ MPASS(tcr.dcr_parent == tdc->dc_parent);
+ dc_removeentry(tdc);
} else
MPASS(dc_find(tdvp, tcnp, DC_OP_NOWAIT) == 0);
}
- fdc = dc_getentry(fvp, fcr.dcr_parent, NULL, DC_OP_LOCKVP);
+ fdc = dc_getentry(fvp, fcnp, tdvp);
if (fdc == NULL) {
MPASS(dc_find(fdvp, fcnp, DC_OP_NOWAIT) == 0);
error = ENOENT;
goto out;
}
+ DCDEBUG("rename: remove from tree: %p %s; parent=%p\n", fdc,
+ fdc->dc_name, fcr.dcr_parent);
+ RB_REMOVE(dircache_tree, &fcr.dcr_parent->dc_children, fdc);
+ DCDEBUG("rename: rename: %p %s\n", fdc, fdc->dc_name);
+ namebuf = NULL;
+ if (dc_namebuffits(fdc, tcnp->cn_namelen) == 0) {
+ MPASS(fdc->dc_namelen > 0 && fdc->dc_name != NULL);
+ dc_unlock(fdc);
+ namebuf = dc_allocnamebuf(tcnp->cn_namelen);
+ dc_lock(fdc);
+ }
+ dc_setname(fdc, tcnp->cn_nameptr, tcnp->cn_namelen, namebuf);
dc_updategen(fdc);
- RB_REMOVE(dircache_tree, &fcr.dcr_parent->dc_children, fdc);
- dc_setname(fdc, tcnp->cn_nameptr, tcnp->cn_namelen);
+ DCDEBUG("rename: insert to tree: %p %s; parent=%p\n", fdc, fdc->dc_name,
+ tcr.dcr_parent);
+reinsert:
col = RB_INSERT(dircache_tree, &tcr.dcr_parent->dc_children, fdc);
- MPASS(col == NULL);
- if (fcr.dcr_parent != tcr.dcr_parent) {
+ if (col != NULL) {
+ DCDEBUG("rename: insert collision: %p %s; type=%d\n", col,
+ col->dc_name, col->dc_type);
+ if (col->dc_type != DT_NEGATIVE)
+ panic("dircache: rename: invalid entry: %d %s\n",
+ col->dc_type, col->dc_name);
+ dc_relock(fdc, col);
+ dc_removeentry(col);
+ dc_lock(fdc);
+ goto reinsert;
+ }
+ if (fdvp != tdvp) {
dc_ref(tcr.dcr_parent);
+ fdc->dc_parent = tcr.dcr_parent;
+ dc_unlock(fdc);
dc_rele(fcr.dcr_parent);
- fdc->dc_parent = tcr.dcr_parent;
- }
- dc_unlock(fdc);
+ } else
+ dc_unlock(fdc);
out:
dircache_endupdate(&tcr, 0);
@@ -694,10 +1039,14 @@
{
struct dircache_cursor curs;
+ if (cnp->cn_nameptr[0] == '.' && (cnp->cn_namelen == 1 ||
+ (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')))
+ panic("dircache: set negative for '.' or '..'");
+
if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0)
return (ENOENT);
- dircache_update(&curs, DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen,
- 0, 0);
+ dc_update(&curs, NULL, DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen,
+ 0, 0, NULL);
dircache_endupdate(&curs, 0);
return (0);
}
@@ -708,19 +1057,29 @@
{
struct dircache *dc;
- dc = dc_getentry(vp, NULL, NULL, 0);
+ dc = dc_getentry(vp, cnp, dvp);
if (dc != NULL) {
- MPASS(dc->dc_namelen == cnp->cn_namelen &&
- bcmp(dc->dc_name, cnp->cn_nameptr, dc->dc_namelen) == 0);
+ DCDEBUG("setvnode found entry: %p %s; type=%d; vp=%p; cnp=%d %s\n",
+ dc, dc ? dc->dc_name : "??", dc->dc_type, vp,
+ (int)cnp->cn_namelen, cnp->cn_nameptr);
+ MPASS(dc->dc_type == DT_ROOT ||
+ (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) ||
+ ((dc->dc_namelen == cnp->cn_namelen) &&
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list