PERFORCE change 179615 for review

Gleb Kurtsou gk at FreeBSD.org
Mon Jun 14 19:25:55 UTC 2010


http://p4web.freebsd.org/@@179615?ac=10

Change 179615 by gk at gk_h1 on 2010/06/14 19:25:33

	Fix bugs, add missings bits to make cache operations for tmpfs.

Affected files ...

.. //depot/projects/soc2010/gk_namecache/sys/kern/subr_witness.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#2 edit
.. //depot/projects/soc2010/gk_namecache/sys/kern/vfs_subr.c#3 edit
.. //depot/projects/soc2010/gk_namecache/sys/sys/dircache.h#2 edit

Differences ...

==== //depot/projects/soc2010/gk_namecache/sys/kern/subr_witness.c#2 (text+ko) ====

@@ -614,6 +614,19 @@
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
+	 * dircache pool locks/vnode interlock
+	 */
+	{ "dircache lock 0", &lock_class_mtx_sleep },
+	{ "dircache lock 1", &lock_class_mtx_sleep },
+	{ "dircache lock 2", &lock_class_mtx_sleep },
+	{ "dircache lock 3", &lock_class_mtx_sleep },
+	{ "dircache lock 4", &lock_class_mtx_sleep },
+	{ "dircache lock 5", &lock_class_mtx_sleep },
+	{ "dircache lock 6", &lock_class_mtx_sleep },
+	{ "dircache lock 7", &lock_class_mtx_sleep },
+	{ "vnode interlock", &lock_class_mtx_sleep },
+	{ NULL, NULL },
+	/*
 	 * ZFS locking
 	 */
 	{ "dn->dn_mtx", &lock_class_sx },

==== //depot/projects/soc2010/gk_namecache/sys/kern/vfs_dircache.c#2 (text+ko) ====

@@ -49,13 +49,37 @@
 
 #include <sys/dircache.h>
 
-#define DCDEBUG(format, args...)	printf(format ,## args)
+#define DCDEBUG(format, args...)					\
+	do {								\
+		if (dircache_debug != 0)				\
+			printf(format ,## args);			\
+	} while (0)
+
+#define DIRCACHE_STAT(n, descr)						\
+	SYSCTL_PROC(_vfs_dircache, OID_AUTO, n,				\
+	CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE,			\
+	NULL, __CONCAT(dps_, n), dps_sysctlhandler, "LU", descr)
+
+#define DC_NAMEROUND		16	/* power of 2 */
+
+enum {
+	dps_interlock_same,
+	dps_interlock_direct,
+	dps_interlock_reverse,
+	dps_interlock_reverse_fast,
+	dps_max
+};
 
 struct nspace;
 
+struct dircache_poolstat {
+	u_long dps_stats[dps_max];
+};
+
 struct dircache_pool {
 	struct mtx dp_mtx;
 	u_long dp_gen;
+	struct dircache_poolstat dp_stat;
 };
 
 struct dircache_root {
@@ -63,30 +87,68 @@
 	struct dircache *dr_entry;
 };
 
+SYSCTL_NODE(_vfs, OID_AUTO, dircache, CTLFLAG_RW, 0, "Dircache");
 static MALLOC_DEFINE(M_DIRCACHE, "dircache buf", "dircache buffers");
 static uma_zone_t dircache_zone;
 
 static struct dircache_pool **pool;
 static size_t pool_size;
 static u_long pool_id;
+static char **pool_mtxname;
+static const int pool_mtxnamesz = 20;
 
+static int dircache_debug = 1;
+SYSCTL_UINT(_vfs_dircache, OID_AUTO, debug, CTLFLAG_RW, &dircache_debug, 0,
+    "Enable debug");
+
+static int dps_sysctlhandler(SYSCTL_HANDLER_ARGS);
+
+DIRCACHE_STAT(interlock_same,
+    "Same lock hits in interlock");
+DIRCACHE_STAT(interlock_direct,
+    "Direct lock order hits in interlock");
+DIRCACHE_STAT(interlock_reverse,
+    "Reverse lock order hits in interlock");
+DIRCACHE_STAT(interlock_reverse_fast,
+    "Reverse lock order without sleeping hits in interlock");
+
+static int
+ptr_cmp(const void *a, const void *b)
+{
+	return (((uintptr_t)(*(void * const *)a)) -
+	    ((uintptr_t)(*(void * const *)b)));
+}
+
 static void
 dircache_sysinit(void *arg __unused)
 {
 	int i;
 
-	pool_size = 1;
+	pool_size = 4;
 
 	dircache_zone = uma_zcreate("dircache",
 	    sizeof(struct dircache), NULL, NULL, NULL, NULL,
             UMA_ALIGN_PTR, 0);
 	pool = malloc(sizeof(void *) * pool_size,
 	    M_DIRCACHE, M_WAITOK);
+	pool_mtxname = malloc(sizeof(void *) * pool_size,
+	    M_DIRCACHE, M_WAITOK);
+	/*
+	 * Keep struct dircache_pool size minimal. (and align at cache
+	 * pipeline?)
+	 * Use pool address for lock ordering.
+	 */
 	for (i = 0; i < pool_size; i++) {
+		pool_mtxname[i] = malloc(pool_mtxnamesz,
+		    M_DIRCACHE, M_WAITOK | M_ZERO);
 		pool[i] = malloc(sizeof(struct dircache_pool),
 		    M_DIRCACHE, M_WAITOK | M_ZERO);
+	}
+	qsort(pool, pool_size, sizeof(void *), ptr_cmp);
+	for (i = 0; i < pool_size; i++) {
 		pool[i]->dp_gen = pool_id++;
-		mtx_init(&pool[i]->dp_mtx, "dircache lock", NULL, MTX_DEF);
+		snprintf(pool_mtxname[i], pool_mtxnamesz, "dircache lock %d", i);
+		mtx_init(&pool[i]->dp_mtx, pool_mtxname[i], NULL, MTX_DEF);
 	}
 }
 SYSINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysinit, NULL);
@@ -99,13 +161,62 @@
 	for (i = 0; i < pool_size; i++) {
 		mtx_destroy(&pool[i]->dp_mtx);
 		free(pool[i], M_DIRCACHE);
+		free(pool_mtxname[i], M_DIRCACHE);
 	}
 	free(pool, M_DIRCACHE);
+	free(pool_mtxname, M_DIRCACHE);
 	pool = NULL;
 	uma_zdestroy(dircache_zone);
 }
 SYSUNINIT(dircache, SI_SUB_VFS, SI_ORDER_SECOND, dircache_sysuninit, NULL);
 
+static void
+pool_getstats(struct dircache_poolstat *ps)
+{
+	struct dircache_poolstat *stat;
+	int i, ind;
+
+	for (i = 0; i < pool_size; i++) {
+		mtx_lock(&pool[i]->dp_mtx);
+		stat = &pool[i]->dp_stat;
+		for (ind = 0; ind < dps_max; ind++)
+			ps->dps_stats[ind] += stat->dps_stats[ind];
+		mtx_unlock(&pool[i]->dp_mtx);
+	}
+}
+
+static int
+dps_sysctlhandler(SYSCTL_HANDLER_ARGS)
+{
+	struct dircache_poolstat st = {};
+	u_long res;
+	int error;
+
+	pool_getstats(&st);
+	res = st.dps_stats[arg2];
+	error = SYSCTL_OUT(req, &res, sizeof(res));
+
+	return (error);
+}
+
+static __inline void
+dp_incstat(int ind, struct dircache_pool *dp, u_long val)
+{
+	dp->dp_stat.dps_stats[ind] += val;
+}
+
+static __inline int
+dc_cmpname(struct dircache *dc, char *name, size_t namelen)
+{
+	int r;
+
+	r = dc->dc_namelen - namelen;
+	if (r != 0)
+		return (r);
+	r = bcmp(dc->dc_name, name, namelen);
+	return (r);
+}
+
 static __inline int
 dc_cmp(struct dircache *a, struct dircache *b)
 {
@@ -123,57 +234,61 @@
 
 RB_GENERATE_STATIC(dircache_tree, dircache, dc_listentry, dc_cmp);
 
+#define DC_MTX(dc)		(&(dc)->dc_pool->dp_mtx)
+
+#define dc_lock(dc)		mtx_lock(DC_MTX(dc))
+
+#define dc_trylock(dc)		mtx_trylock(DC_MTX(dc))
+
+#define dc_unlock(dc)		mtx_unlock(DC_MTX(dc))
+
+#define dc_assertlock(dc, w)	mtx_assert(DC_MTX(dc), (w))
 
-static __inline void
-dc_lock(struct dircache *dc)
+static void
+dc_relock(struct dircache *from, struct dircache *to)
 {
-	mtx_lock(&dc->dc_pool->dp_mtx);
-}
+	dc_assertlock(from, MA_OWNED);
 
-static __inline int
-dc_trylock(struct dircache *dc)
-{
-	return (mtx_trylock(&dc->dc_pool->dp_mtx));
-}
+	if (from->dc_pool == to->dc_pool)
+		return;
 
-static __inline void
-dc_unlock(struct dircache *dc)
-{
-	mtx_unlock(&dc->dc_pool->dp_mtx);
-}
+	dc_assertlock(to, MA_NOTOWNED);
 
-static __inline void
-dc_assertlock(struct dircache *dc, int what)
-{
-	mtx_assert(&dc->dc_pool->dp_mtx, what);
+	dc_unlock(from);
+	dc_lock(to);
 }
 
 static void
-dc_interlock(struct dircache *a, struct dircache *b)
+dc_interlock(struct dircache *from, struct dircache *to)
 {
-	dc_assertlock(a, MA_OWNED);
+	dc_assertlock(from, MA_OWNED);
 
-	if (a->dc_pool == b->dc_pool)
+	if (from->dc_pool == to->dc_pool) {
+		dp_incstat(dps_interlock_same, to->dc_pool, 1);
 		return;
+	}
 
-	dc_assertlock(b, MA_NOTOWNED);
-	if ((uintptr_t)a->dc_pool < (uintptr_t)b->dc_pool) {
-		dc_lock(b);
-		dc_unlock(a);
+	dc_assertlock(to, MA_NOTOWNED);
+	if ((uintptr_t)from->dc_pool < (uintptr_t)to->dc_pool) {
+		dc_lock(to);
+		dc_unlock(from);
+		dp_incstat(dps_interlock_direct, to->dc_pool, 1);
 		return;
 	}
 
 	critical_enter();
-	if (dc_trylock(b) != 0) {
-		dc_unlock(a);
+	if (dc_trylock(to) != 0) {
+		dc_unlock(from);
 		critical_exit();
+		dp_incstat(dps_interlock_reverse_fast, to->dc_pool, 1);
 		return;
 	}
 	critical_exit();
 
 	/* !!!! FIXME !!!! */
-	dc_unlock(a);
-	dc_lock(b);
+	dc_unlock(from);
+	dc_lock(to);
+	dp_incstat(dps_interlock_reverse, to->dc_pool, 1);
 }
 
 static __inline void
@@ -184,16 +299,41 @@
 	dc->dc_namehash = hash32_buf(name, namelen, HASHINIT * namelen);
 }
 
+static __inline size_t
+dc_namebuflen(size_t namelen)
+{
+	return (roundup2(namelen + 1, DC_NAMEROUND));
+}
+
+static __inline int
+dc_namebuffits(struct dircache *dc, size_t namelen)
+{
+	return (dc_namebuflen(dc->dc_namelen) < namelen + 1);
+}
+
+static __inline char *
+dc_allocnamebuf(size_t namelen)
+{
+	char * buf;
+
+	buf = malloc(dc_namebuflen(namelen), M_DIRCACHE, M_WAITOK);
+	return (buf);
+}
+
 static __inline void
-dc_setname(struct dircache *dc, char *name, size_t namelen)
+dc_setname(struct dircache *dc, char *name, size_t namelen, char *namebuf)
 {
 	MPASS(name != dc->dc_name);
 
-	if (dc->dc_name == NULL || dc->dc_namelen < namelen) {
+	if (dc->dc_name == NULL || dc_namebuffits(dc, namelen) == 0) {
 		if (dc->dc_name != NULL)
 			free(dc->dc_name, M_DIRCACHE);
-		dc->dc_name = malloc(namelen + 1, M_DIRCACHE, M_WAITOK);
-	}
+		if (namebuf == NULL)
+			dc->dc_name = dc_allocnamebuf(namelen);
+		else
+			dc->dc_name = namebuf;
+	} else
+		MPASS(namebuf == NULL);
 	memcpy(dc->dc_name, name, namelen);
 	dc->dc_name[namelen] = '\0';
 	dc_initname(dc, dc->dc_name, namelen);
@@ -210,7 +350,6 @@
 	dc->dc_gen = *genp;
 }
 
-
 static struct dircache *
 dc_alloc(struct dircache *pdc, enum dircache_type type,
     char *name, size_t namelen)
@@ -220,22 +359,23 @@
 
 	dc = uma_zalloc(dircache_zone, M_WAITOK | M_ZERO);
 	DCDEBUG("alloc: %p %s\n", dc, name);
+
+	dc->dc_type = type;
+	dc->dc_flags = DC_CH_PARTIAL;
+	dc->dc_parent = pdc;
+	refcount_init(&dc->dc_refcnt, 1);
 	cv_init(&dc->dc_condvar, "dircache cv");
+
 	if (name != NULL && namelen != 0) {
-		dc_setname(dc, name, namelen);
-		dc->dc_parent = pdc;
+		dc_setname(dc, name, namelen, NULL);
 		/* cheaper way to get pseudo-random value */
 		poolind = dc->dc_namehash;
 	} else {
 		poolind = arc4random();
 	}
-
 	poolind %= pool_size;
 	dc->dc_pool = pool[poolind];
 
-	dc->dc_flags = DC_CH_PARTIAL;
-	refcount_init(&dc->dc_refcnt, 1);
-
 	return (dc);
 }
 
@@ -290,18 +430,6 @@
 }
 
 static void
-dc_refvnode_locked(struct dircache *dc, struct vnode *vp)
-{
-	dc_ref(dc);
-	MPASS(dc->dc_vnode == NULL);
-	dc->dc_vnode = vp;
-	TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist);
-	DCDEBUG("refvnode locked: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
-	    vp, dc->dc_refcnt);
-	MPASS(TAILQ_FIRST(&vp->v_dircache) == dc);
-}
-
-static void
 dc_refvnode(struct dircache *dc, struct vnode *vp)
 {
 	if (dc->dc_type != DT_ROOT)
@@ -309,57 +437,102 @@
 	DCDEBUG("refvnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
 	    vp, dc->dc_refcnt);
 
+	MPASS(vp->v_type != VNON && vp->v_type != VBAD);
+	MPASS(dc->dc_vnode == NULL);
 	dc_ref(dc);
-	MPASS(dc->dc_vnode == NULL);
 	dc->dc_vnode = vp;
-	dc_unlock(dc);
 	VI_LOCK(vp);
+	if (vp->v_type == VDIR && !TAILQ_EMPTY(&vp->v_dircache))
+		panic("dircache: multiple directory vnode references %p", vp);
 	TAILQ_INSERT_HEAD(&vp->v_dircache, dc, dc_vnodelist);
 	VI_UNLOCK(vp);
 }
 
 static void
-dc_relevnode(struct dircache *dc, struct vnode *vp)
+dc_relevnode(struct dircache *dc)
 {
 	MPASS(dc->dc_vnode != NULL);
+	dc_assertlock(dc, MA_OWNED);
 	DCDEBUG("relevnode: %p %s; vp=%p; refcnt=%d\n", dc, dc->dc_name,
-	    vp, dc->dc_refcnt);
+	    dc->dc_vnode, dc->dc_refcnt);
 
-	TAILQ_REMOVE(&vp->v_dircache, dc, dc_vnodelist);
+	VI_LOCK(dc->dc_vnode);
+	TAILQ_REMOVE(&dc->dc_vnode->v_dircache, dc, dc_vnodelist);
+	VI_UNLOCK(dc->dc_vnode);
 	dc->dc_vnode = NULL;
 	dc_unlock(dc);
 	dc_rele(dc);
 }
 
 static struct dircache *
-dc_getentry(struct vnode *vp, struct dircache *parent_hint,
-    struct nspace *nspace_hint, int flags)
+dc_getentry(struct vnode *vp, struct componentname *cnp, struct vnode *dvp)
 {
 	struct dircache *dc;
 
+restart:
 	VI_LOCK(vp);
 	dc = TAILQ_FIRST(&vp->v_dircache);
 	if (dc == NULL) {
+		VI_UNLOCK(vp);
 		if ((vp->v_vflag & VV_ROOT) != 0) {
 			dc = vp->v_mount->mnt_dircache;
 			DCDEBUG("getentry: root %p vp=%p\n", dc, vp);
 			MPASS(dc != NULL);
 			dc_lock(dc);
-			dc_refvnode_locked(dc, vp);
-			goto out;
+			dc_refvnode(dc, vp);
 		} else {
-			VI_UNLOCK(vp);
+#if 0
 			DCDEBUG("getentry: not found vp=%p\n", vp);
+#else
+			panic("dircache: entry not found for vnode %p\n", vp);
+#endif
 			return (NULL);
 		}
+	} else {
+		if (TAILQ_NEXT(dc, dc_vnodelist) != NULL) {
+			MPASS(cnp != NULL && dvp != NULL);
+			MPASS(vp->v_type != VDIR);
+			MPASS(!(cnp->cn_nameptr[0] == '.' &&
+			    (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 &&
+			    cnp->cn_nameptr[1] == '.'))));
+
+			for(; dc != NULL; dc = TAILQ_NEXT(dc, dc_vnodelist)) {
+				VI_UNLOCK(vp);
+				dc_lock(dc);
+				if (dc->dc_vnode != vp) {
+					dc_unlock(dc);
+					DCDEBUG("getenrty: restart; multiple entries; vp=%p\n",
+					    vp);
+					goto restart;
+				}
+				/* FIXME: dc_parent locking */
+				if (dc_cmpname(dc, cnp->cn_nameptr,
+				    cnp->cn_namelen) == 0 &&
+				    dvp == dc->dc_parent->dc_vnode)
+					break;
+				dc_unlock(dc);
+				VI_LOCK(vp);
+			}
+			if (dc == NULL) {
+				VI_UNLOCK(vp);
+#if 0
+				return (NULL);
+#else
+				panic("dircache: entry not found for vnode %p (multiple)\n", vp);
+#endif
+			}
+		} else {
+			VI_UNLOCK(vp);
+			dc_lock(dc);
+		}
 	}
-	MPASS(TAILQ_NEXT(dc, dc_vnodelist) == NULL);
-	dc_lock(dc);
 
-out:
-	if ((flags & DC_OP_LOCKVP) == 0)
-		VI_UNLOCK(vp);
-
+	dc_assertlock(dc, MA_OWNED);
+	if (dc->dc_vnode != vp) {
+		dc_unlock(dc);
+		DCDEBUG("getenrty: restart; vp=%p\n", vp);
+		goto restart;
+	}
 	return (dc);
 }
 
@@ -369,7 +542,7 @@
 	struct dircache key;
 	struct dircache *pdc, *dc;
 
-	pdc = dc_getentry(dvp, NULL, NULL, 0);
+	pdc = dc_getentry(dvp, NULL, NULL);
 	if (pdc == NULL)
 		return (NULL);
 	dc_assertlock(pdc, MA_OWNED);
@@ -381,15 +554,107 @@
 		return (NULL);
 	}
 
+	if ((flags & DC_OP_NOWAIT) == 0)
+		dc_wait(pdc);
+
 	dc_interlock(pdc, dc);
 	dc_assertlock(dc, MA_OWNED);
+	MPASS(dc->dc_parent == pdc);
+
+	return (dc);
+}
 
-	if ((flags & DC_OP_NOWAIT) == 0)
-		dc_wait(dc);
+static struct dircache *
+dc_update(struct dircache_cursor *curs, struct vnode *vp,
+    enum dircache_type type, char *name, size_t namelen,
+    ino_t inode, off_t offset, void *fspriv)
+{
+	struct dircache *dc, *pdc, *col;
+
+	pdc = curs->dcr_parent;
+	MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0);
+
+	DCDEBUG("update: parent=%p name=%s\n", pdc, name);
+
+	dc = dc_alloc(pdc, type, name, namelen);
 
+	if (type == DT_WEAK)
+		curs->dcr_nflags |= DC_CH_HASWEAK;
+	dc->dc_fspriv = fspriv;
+	col = RB_INSERT(dircache_tree, &pdc->dc_children, dc);
+	if (col != NULL) {
+		if (type == col->dc_type) {
+			DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name);
+			MPASS(col->dc_inode == inode && col->dc_offset == offset);
+			dc->dc_fspriv = NULL;
+			dc->dc_parent = NULL;
+			dc_rele(dc);
+			return (NULL);
+		} else if (col->dc_type == DT_NEGATIVE) {
+			DCDEBUG("update: replace negative entry: %p %s\n", dc, dc->dc_name);
+			dc_lock(col);
+			col->dc_type = type;
+			MPASS((col->dc_flags & DC_CH_COMPLETE) == 0);
+			col->dc_flags |= DC_CH_PARTIAL;
+			col->dc_inode = inode;
+			col->dc_offset = offset;
+			MPASS(col->dc_fspriv == NULL);
+			col->dc_fspriv = fspriv;
+			dc->dc_fspriv = NULL;
+			dc_unlock(col);
+			dc->dc_parent = NULL;
+			dc_rele(dc);
+			dc = col;
+		} else
+			panic("dircache: update: ivalid entry: %d %s\n",
+			    dc->dc_type, dc->dc_name);
+	} else
+		dc_ref(pdc);
+	if (vp != NULL) {
+		dc_lock(dc);
+		dc_refvnode(dc, vp);
+		dc_unlock(dc);
+	}
 	return (dc);
 }
 
+static void
+dc_removeentry(struct dircache *dc)
+{
+	struct dircache *parent;
+	MPASS(dc->dc_parent != NULL);
+
+	dc->dc_fspriv = NULL;
+	dc->dc_type = DT_INVAL;
+	parent = dc->dc_parent;
+	dc->dc_parent = NULL;
+	RB_REMOVE(dircache_tree, &parent->dc_children, dc);
+	if (dc->dc_vnode != NULL)
+		dc_relevnode(dc);
+	else
+		dc_unlock(dc);
+	dc_rele(parent);
+	dc_rele(dc);
+}
+
+static void
+dc_marknegative(struct dircache *dc)
+{
+	DCDEBUG("mark negative: %p %s; vp=%p\n", dc, dc->dc_name, dc->dc_vnode);
+	dc->dc_inode = 0;
+	dc->dc_offset = 0;
+	dc->dc_fspriv = NULL;
+	dc->dc_type = DT_NEGATIVE;
+	dc->dc_flags &= ~DC_CH_COMPLETE;
+	dc->dc_flags |= DC_CH_PARTIAL;
+	dc_updategen(dc);
+	if (dc->dc_vnode != NULL)
+		dc_relevnode(dc);
+	else
+		dc_unlock(dc);
+	dc_assertlock(dc, MA_NOTOWNED);
+}
+
 void
 dircache_init(struct mount *mp, ino_t inode)
 {
@@ -416,15 +681,22 @@
 {
 	struct dircache *dc, *ch, *tmp;
 
+restart:
 	VI_LOCK(vp);
 	TAILQ_FOREACH(dc, &vp->v_dircache, dc_vnodelist) {
 		DCDEBUG("purge negative: %p %s; vp=%p\n", dc, dc->dc_name, vp);
+		VI_UNLOCK(vp);
 		dc_lock(dc);
+		if (dc->dc_vnode != vp) {
+			dc_unlock(dc);
+			goto restart;
+		}
 		RB_FOREACH_SAFE(ch, dircache_tree, &dc->dc_children, tmp) {
-			if (dc->dc_type == DT_NEGATIVE)
+			if (ch->dc_type == DT_NEGATIVE)
 				RB_REMOVE(dircache_tree, &dc->dc_children,
 				    ch);
 		}
+		VI_LOCK(vp);
 		dc_unlock(dc);
 	}
 	VI_UNLOCK(vp);
@@ -434,29 +706,7 @@
 dircache_update(struct dircache_cursor *curs, enum dircache_type type,
     char *name, size_t namelen, ino_t inode, off_t offset)
 {
-	struct dircache *dc, *pdc, *col;
-
-	pdc = curs->dcr_parent;
-	MPASS((pdc->dc_flags & DC_CH_UPDATING) != 0);
-
-	DCDEBUG("update: parent=%p name=%s\n", pdc, name);
-
-	dc = dc_alloc(pdc, type, name, namelen);
-
-	if (type == DT_WEAK)
-		curs->dcr_nflags |= DC_CH_HASWEAK;
-	col = RB_INSERT(dircache_tree, &pdc->dc_children, dc);
-	if (col == NULL)
-		dc_ref(pdc);
-	else {
-		if (type == col->dc_type) {
-			DCDEBUG("update: warn: same entry added: %s\n", dc->dc_name);
-			dc_free(dc);
-			return;
-		}
-		panic("dircache: unexpected entry during update: %d %s\n",
-		    dc->dc_type, dc->dc_name);
-	}
+	dc_update(curs, NULL, type, name, namelen, inode, offset, NULL);
 }
 
 int
@@ -468,7 +718,7 @@
 
 
 	curs->dcr_parent = NULL;
-	dc = dc_getentry(dvp, NULL, NULL, 0);
+	dc = dc_getentry(dvp, NULL, NULL);
 	if (dc == NULL) {
 		DCDEBUG("beginupdate: not found dvp=%p; path=%s\n",
 		    dvp, cnp->cn_nameptr);
@@ -542,32 +792,100 @@
 	dircache_endupdate(curs, DC_CH_PARTIAL);
 }
 
+static int
+dircache_lookupdot(struct vnode *dvp, struct vnode **vpp,
+    struct componentname *cnp)
+{
+	int ltype;
+
+	MPASS(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.');
+
+	DCDEBUG("lookup dot: dvp=%p\n", dvp);
+
+	*vpp = dvp;
+	vref(*vpp);
+	/*
+	 * When we lookup "." we still can be asked to lock it
+	 * differently.
+	 */
+	ltype = cnp->cn_lkflags & LK_TYPE_MASK;
+	if (ltype != VOP_ISLOCKED(*vpp)) {
+		if (ltype == LK_EXCLUSIVE) {
+			vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
+			if ((*vpp)->v_iflag & VI_DOOMED) {
+				/* forced unmount */
+				vrele(*vpp);
+				*vpp = NULL;
+				return (ENOENT);
+			}
+		} else
+			vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
+	}
+	return (-1);
+}
+
 int
 dircache_lookup(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp)
 {
-	struct dircache *dc;
-	int error;
+	struct dircache *pdc, *dc;
+	int error, ltype;
 
-	dc = dc_find(dvp, cnp, 0);
+	if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1)
+		return (dircache_lookupdot(dvp, vpp, cnp));
+	else if (cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.' &&
+	    cnp->cn_namelen == 2) {
+		MPASS((cnp->cn_flags & ISDOTDOT) != 0);
+		pdc = dc_getentry(dvp, NULL, NULL);
+		if (pdc != NULL) {
+			DCDEBUG("lookup dotdot: dvp=%p\n", dvp);
+			dc = pdc->dc_parent;
+			dc_interlock(pdc, dc);
+		} else
+			dc = NULL;
+	} else
+		dc = dc_find(dvp, cnp, 0);
 	if (dc == NULL) {
-		DCDEBUG("lookup: not found: %s; dvp=%p\n", cnp->cn_nameptr, dvp);
+		DCDEBUG("lookup: not found: %s; dvp=%p; op=%ld\n",
+		    cnp->cn_nameptr, dvp, cnp->cn_nameiop);
 		return (0);
 	}
 	error = 0;
 	if (dc->dc_type == DT_NEGATIVE)
-		error = ENOENT;
+		switch (cnp->cn_nameiop) {
+		case CREATE:
+		case RENAME:
+			error = 0;
+			break;
+		default:
+			error = ENOENT;
+		}
 	else if (dc->dc_vnode != NULL) {
 		*vpp = dc->dc_vnode;
 		error = -1;
 	}
 	dc_unlock(dc);
-	DCDEBUG("lookup: error=%d: %p %s; dvp=%p\n", error, dc, dc->dc_name, dvp);
+	DCDEBUG("lookup: error=%d: %p %s; dvp=%p; op=%ld\n", error, dc,
+	    dc->dc_name, dvp, cnp->cn_nameiop);
 	if (error == -1) {
+		ltype = 0;
+		if ((cnp->cn_flags & ISDOTDOT) != 0) {
+			ltype = VOP_ISLOCKED(dvp);
+			VOP_UNLOCK(dvp, 0);
+		}
 		if (vget(*vpp, cnp->cn_lkflags, cnp->cn_thread) != 0) {
 			*vpp = NULL;
 			error = 0;
 		}
+		if (cnp->cn_flags & ISDOTDOT) {
+			vn_lock(dvp, ltype | LK_RETRY);
+			if (dvp->v_iflag & VI_DOOMED) {
+				if (error == 0)
+					vput(*vpp);
+				*vpp = NULL;
+				return (ENOENT);
+			}
+		}
 	}
 	return (error);
 }
@@ -583,23 +901,12 @@
 	DCDEBUG("add: inode=%d %s; vp=%p\n", inode, cnp->cn_nameptr, vp);
 	if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0)
 		return (ENOENT);
-	dircache_update(&curs, type, cnp->cn_nameptr, cnp->cn_namelen,
-	    inode, offset);
+	dc_update(&curs, vp, type, cnp->cn_nameptr, cnp->cn_namelen,
+	    inode, offset, NULL);
 	dircache_endupdate(&curs, 0);
 	return (0);
 }
 
-static void
-dc_remove(struct dircache *dc, struct vnode *vp)
-{
-	dc->dc_inode = 0;
-	dc->dc_offset = 0;
-	dc->dc_fspriv = NULL;
-	dc->dc_type = DT_NEGATIVE;
-	dc_updategen(dc);
-	dc_relevnode(dc, vp);
-}
-
 int
 dircache_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
@@ -607,16 +914,18 @@
 	struct dircache *dc;
 
 	DCDEBUG("remove: %s; vp=%p\n", cnp->cn_nameptr, vp);
-	if (dircache_beginupdate(&curs, dvp, cnp, 0) == 0)
+	if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0)
 		return (ENOENT);
 
-	dc = dc_getentry(vp, curs.dcr_parent, NULL, DC_OP_LOCKVP);
+	dc = dc_getentry(vp, cnp, dvp);
 	if (dc == NULL) {
+		DCDEBUG("remove: vp not found: %s vp=%p\n", cnp->cn_nameptr,
+		    vp);
 		MPASS(dc_find(dvp, cnp, DC_OP_NOWAIT) == 0);
 		dircache_endupdate(&curs, 0);
 		return (ENOENT);
 	}
-	dc_remove(dc, vp);
+	dc_marknegative(dc);
 	dircache_endupdate(&curs, 0);
 	return (0);
 }
@@ -628,6 +937,7 @@
 {
 	struct dircache_cursor fcr, tcr;
 	struct dircache *fdc, *tdc, *col;
+	char *namebuf;
 	int error;
 
 	MPASS(fvp != tvp);
@@ -638,48 +948,83 @@
 	if (fdvp == tdvp) {
 		if (tcr.dcr_parent == NULL)
 			return (ENOENT);
+		fcr.dcr_parent = tcr.dcr_parent;
 	} else {
 		dircache_beginupdate(&fcr, fdvp, fcnp, 0);
+#if 0
 		if (tcr.dcr_parent == NULL && fcr.dcr_parent == NULL) {
-			MPASS(dc_getentry(fvp, NULL, NULL, 0) == NULL);
+			MPASS(dc_getentry(fvp, fcnp) == NULL);
 			MPASS(tvp == NULL ||
-			    dc_getentry(tvp, NULL, NULL, 0) == NULL);
+			    dc_getentry(tvp, NULL) == NULL);
 			return (ENOENT);
 		} else if (tcr.dcr_parent == NULL) {
-			error = dircache_remove(fdvp, fvp, fcnp);
+			fdc = dc_getentry(fvp, fcnp);
+			if (fdc != NULL) {
+				dc_marknegative(fdc, fvp);
+			} else {
+				MPASS(dc_find(fdvp, fcnp, DC_OP_NOWAIT) == 0);
+				error = ENOENT;
+			}
 			dircache_endupdate(&fcr, 0);
 			return (error);
 		} else if (fcr.dcr_parent == NULL) {
 			dircache_partialupdate(&tcr);
 			return (0);
 		}
+#else
+		MPASS(tcr.dcr_parent != NULL && fcr.dcr_parent != NULL);
+#endif
 	}
 
 	if (tvp != NULL) {
-		tdc = dc_getentry(tvp, tcr.dcr_parent, NULL, DC_OP_LOCKVP);
+		tdc = dc_getentry(tvp, tcnp, tdvp);
 		if (tdc != NULL) {
-			MPASS(fcr.dcr_parent == tdc->dc_parent);
-			dc_remove(tdc, tvp);
+			MPASS(tcr.dcr_parent == tdc->dc_parent);
+			dc_removeentry(tdc);
 		} else
 			MPASS(dc_find(tdvp, tcnp, DC_OP_NOWAIT) == 0);
 	}
-	fdc = dc_getentry(fvp, fcr.dcr_parent, NULL, DC_OP_LOCKVP);
+	fdc = dc_getentry(fvp, fcnp, tdvp);
 	if (fdc == NULL) {
 		MPASS(dc_find(fdvp, fcnp, DC_OP_NOWAIT) == 0);
 		error = ENOENT;
 		goto out;
 	}
+	DCDEBUG("rename: remove from tree: %p %s; parent=%p\n", fdc,
+	    fdc->dc_name, fcr.dcr_parent);
+	RB_REMOVE(dircache_tree, &fcr.dcr_parent->dc_children, fdc);
+	DCDEBUG("rename: rename: %p %s\n", fdc, fdc->dc_name);
+	namebuf = NULL;
+	if (dc_namebuffits(fdc, tcnp->cn_namelen) == 0) {
+		MPASS(fdc->dc_namelen > 0 && fdc->dc_name != NULL);
+		dc_unlock(fdc);
+		namebuf = dc_allocnamebuf(tcnp->cn_namelen);
+		dc_lock(fdc);
+	}
+	dc_setname(fdc, tcnp->cn_nameptr, tcnp->cn_namelen, namebuf);
 	dc_updategen(fdc);
-	RB_REMOVE(dircache_tree, &fcr.dcr_parent->dc_children, fdc);
-	dc_setname(fdc, tcnp->cn_nameptr, tcnp->cn_namelen);
+	DCDEBUG("rename: insert to tree: %p %s; parent=%p\n", fdc, fdc->dc_name,
+	    tcr.dcr_parent);
+reinsert:
 	col = RB_INSERT(dircache_tree, &tcr.dcr_parent->dc_children, fdc);
-	MPASS(col == NULL);
-	if (fcr.dcr_parent != tcr.dcr_parent) {
+	if (col != NULL) {
+		DCDEBUG("rename: insert collision: %p %s; type=%d\n", col,
+		    col->dc_name, col->dc_type);
+		if (col->dc_type != DT_NEGATIVE)
+			panic("dircache: rename: invalid entry: %d %s\n",
+			    col->dc_type, col->dc_name);
+		dc_relock(fdc, col);
+		dc_removeentry(col);
+		dc_lock(fdc);
+		goto reinsert;
+	}
+	if (fdvp != tdvp) {
 		dc_ref(tcr.dcr_parent);
+		fdc->dc_parent = tcr.dcr_parent;
+		dc_unlock(fdc);
 		dc_rele(fcr.dcr_parent);
-		fdc->dc_parent = tcr.dcr_parent;
-	}
-	dc_unlock(fdc);
+	} else
+		dc_unlock(fdc);
 
 out:
 	dircache_endupdate(&tcr, 0);
@@ -694,10 +1039,14 @@
 {
 	struct dircache_cursor curs;
 
+	if (cnp->cn_nameptr[0] == '.' && (cnp->cn_namelen == 1 ||
+	    (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')))
+		panic("dircache: set negative for '.' or '..'");
+
 	if (dircache_beginupdate(&curs, dvp, cnp, 0) != 0)
 		return (ENOENT);
-	dircache_update(&curs, DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen,
-	    0, 0);
+	dc_update(&curs, NULL, DT_NEGATIVE, cnp->cn_nameptr, cnp->cn_namelen,
+	    0, 0, NULL);
 	dircache_endupdate(&curs, 0);
 	return (0);
 }
@@ -708,19 +1057,29 @@
 {
 	struct dircache *dc;
 
-	dc = dc_getentry(vp, NULL, NULL, 0);
+	dc = dc_getentry(vp, cnp, dvp);
 	if (dc != NULL) {
-		MPASS(dc->dc_namelen == cnp->cn_namelen &&
-		    bcmp(dc->dc_name, cnp->cn_nameptr, dc->dc_namelen) == 0);
+		DCDEBUG("setvnode found entry: %p %s; type=%d; vp=%p; cnp=%d %s\n",
+		    dc, dc ? dc->dc_name : "??", dc->dc_type, vp,
+		    (int)cnp->cn_namelen, cnp->cn_nameptr);
+		MPASS(dc->dc_type == DT_ROOT ||
+		    (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) ||
+		    ((dc->dc_namelen == cnp->cn_namelen) &&

>>> TRUNCATED FOR MAIL (1000 lines) <<<


More information about the p4-projects mailing list