svn commit: r245115 - head/sys/fs/tmpfs
Gleb Kurtsou
gleb at FreeBSD.org
Sun Jan 6 22:15:45 UTC 2013
Author: gleb
Date: Sun Jan 6 22:15:44 2013
New Revision: 245115
URL: http://svnweb.freebsd.org/changeset/base/245115
Log:
tmpfs: Replace directory entry linked list with RB-Tree.
Use file name hash as a tree key, handle duplicate keys. Both VOP_LOOKUP
and VOP_READDIR operations utilize same tree for search. Directory
entry offset (cookie) is either file name hash or incremental id in case
of hash collisions (duplicate-cookies). Keep sorted per directory list
of duplicate-cookie entries to facilitate cookie number allocation.
Don't fail if previous VOP_READDIR() offset is no longer valid, start
with next dirent instead. Other file system handle it similarly.
Workaround race prone tn_readdir_last[pn] fields update.
Add tmpfs_dir_destroy() to free all dirents.
Set NFS cookies in tmpfs_dir_getdents(). Return EJUSTRETURN from
tmpfs_dir_getdents() instead of hard coded -1.
Mark directory traversal routines static as they are no longer
used outside of tmpfs_subr.c
Modified:
head/sys/fs/tmpfs/tmpfs.h
head/sys/fs/tmpfs/tmpfs_subr.c
head/sys/fs/tmpfs/tmpfs_vfsops.c
head/sys/fs/tmpfs/tmpfs_vnops.c
Modified: head/sys/fs/tmpfs/tmpfs.h
==============================================================================
--- head/sys/fs/tmpfs/tmpfs.h Sun Jan 6 21:56:58 2013 (r245114)
+++ head/sys/fs/tmpfs/tmpfs.h Sun Jan 6 22:15:44 2013 (r245115)
@@ -49,6 +49,7 @@
/* --------------------------------------------------------------------- */
#include <sys/malloc.h>
#include <sys/systm.h>
+#include <sys/tree.h>
#include <sys/vmmeter.h>
#include <vm/swap_pager.h>
@@ -60,104 +61,81 @@ MALLOC_DECLARE(M_TMPFSNAME);
/*
* Internal representation of a tmpfs directory entry.
*/
+
+LIST_HEAD(tmpfs_dir_duphead, tmpfs_dirent);
+
struct tmpfs_dirent {
- TAILQ_ENTRY(tmpfs_dirent) td_entries;
+ /*
+ * Depending on td_cookie flag entry can be of 3 types:
+ * - regular -- no hash collisions, stored in RB-Tree
+ * - duphead -- synthetic linked list head for dup entries
+ * - dup -- stored in linked list instead of RB-Tree
+ */
+ union {
+ /* regular and duphead entry types */
+ RB_ENTRY(tmpfs_dirent) td_entries;
- /* Length of the name stored in this directory entry. This avoids
- * the need to recalculate it every time the name is used. */
- uint16_t td_namelen;
-
- /* The name of the entry, allocated from a string pool. This
- * string is not required to be zero-terminated; therefore, the
- * td_namelen field must always be used when accessing its value. */
- char * td_name;
+ /* dup entry type */
+ struct {
+ LIST_ENTRY(tmpfs_dirent) entries;
+ LIST_ENTRY(tmpfs_dirent) index_entries;
+ } td_dup;
+ } uh;
+
+ uint32_t td_cookie;
+ uint32_t td_hash;
+ u_int td_namelen;
/* Pointer to the node this entry refers to. In case this field
* is NULL, the node is a whiteout. */
struct tmpfs_node * td_node;
+
+ union {
+ /*
+ * The name of the entry, allocated from a string pool. This
+ * string is not required to be zero-terminated.
+ */
+ char * td_name; /* regular, dup */
+ struct tmpfs_dir_duphead td_duphead; /* duphead */
+ } ud;
};
-/* A directory in tmpfs holds a sorted list of directory entries, which in
+/* A directory in tmpfs holds a list of directory entries, which in
* turn point to other files (which can be directories themselves).
*
- * In tmpfs, this list is managed by a tail queue, whose head is defined by
+ * In tmpfs, this list is managed by a RB-Tree, whose head is defined by
* the struct tmpfs_dir type.
*
- * It is imporant to notice that directories do not have entries for . and
+ * It is important to notice that directories do not have entries for . and
* .. as other file systems do. These can be generated when requested
* based on information available by other means, such as the pointer to
* the node itself in the former case or the pointer to the parent directory
* in the latter case. This is done to simplify tmpfs's code and, more
* importantly, to remove redundancy. */
-TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
+RB_HEAD(tmpfs_dir, tmpfs_dirent);
/* Each entry in a directory has a cookie that identifies it. Cookies
* supersede offsets within directories because, given how tmpfs stores
- * directories in memory, there is no such thing as an offset. (Emulating
- * a real offset could be very difficult.)
- *
+ * directories in memory, there is no such thing as an offset.
+ *
* The '.', '..' and the end of directory markers have fixed cookies which
* cannot collide with the cookies generated by other entries. The cookies
- * fot the other entries are generated based on the memory address on which
- * stores their information is stored.
- *
- * Ideally, using the entry's memory pointer as the cookie would be enough
- * to represent it and it wouldn't cause collisions in any system.
- * Unfortunately, this results in "offsets" with very large values which
- * later raise problems in the Linux compatibility layer (and maybe in other
- * places) as described in PR kern/32034. Hence we need to workaround this
- * with a rather ugly hack.
- *
- * Linux 32-bit binaries, unless built with _FILE_OFFSET_BITS=64, have off_t
- * set to 'long', which is a 32-bit *signed* long integer. Regardless of
- * the macro value, GLIBC (2.3 at least) always uses the getdents64
- * system call (when calling readdir) which internally returns off64_t
- * offsets. In order to make 32-bit binaries work, *GLIBC* converts the
- * 64-bit values returned by the kernel to 32-bit ones and aborts with
- * EOVERFLOW if the conversion results in values that won't fit in 32-bit
- * integers (which it assumes is because the directory is extremely large).
- * This wouldn't cause problems if we were dealing with unsigned integers,
- * but as we have signed integers, this check fails due to sign expansion.
+ * for the other entries are generated based on the file name hash value or
+ * unique number in case of name hash collision.
*
- * For example, consider that the kernel returns the 0xc1234567 cookie to
- * userspace in a off64_t integer. Later on, GLIBC casts this value to
- * off_t (remember, signed) with code similar to:
- * system call returns the offset in kernel_value;
- * off_t casted_value = kernel_value;
- * if (sizeof(off_t) != sizeof(off64_t) &&
- * kernel_value != casted_value)
- * error!
- * In this case, casted_value still has 0xc1234567, but when it is compared
- * for equality against kernel_value, it is promoted to a 64-bit integer and
- * becomes 0xffffffffc1234567, which is different than 0x00000000c1234567.
- * Then, GLIBC assumes this is because the directory is very large.
- *
- * Given that all the above happens in user-space, we have no control over
- * it; therefore we must workaround the issue here. We do this by
- * truncating the pointer value to a 32-bit integer and hope that there
- * won't be collisions. In fact, this will not cause any problems in
- * 32-bit platforms but some might arise in 64-bit machines (I'm not sure
- * if they can happen at all in practice).
- *
- * XXX A nicer solution shall be attempted. */
-#ifdef _KERNEL
-#define TMPFS_DIRCOOKIE_DOT 0
-#define TMPFS_DIRCOOKIE_DOTDOT 1
-#define TMPFS_DIRCOOKIE_EOF 2
-static __inline
-off_t
-tmpfs_dircookie(struct tmpfs_dirent *de)
-{
- off_t cookie;
-
- cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF;
- MPASS(cookie != TMPFS_DIRCOOKIE_DOT);
- MPASS(cookie != TMPFS_DIRCOOKIE_DOTDOT);
- MPASS(cookie != TMPFS_DIRCOOKIE_EOF);
+ * To preserve compatibility cookies are limited to 31 bits.
+ */
- return cookie;
-}
-#endif
+#define TMPFS_DIRCOOKIE_DOT 0
+#define TMPFS_DIRCOOKIE_DOTDOT 1
+#define TMPFS_DIRCOOKIE_EOF 2
+#define TMPFS_DIRCOOKIE_MASK ((off_t)0x3fffffffU)
+#define TMPFS_DIRCOOKIE_MIN ((off_t)0x00000004U)
+#define TMPFS_DIRCOOKIE_DUP ((off_t)0x40000000U)
+#define TMPFS_DIRCOOKIE_DUPHEAD ((off_t)0x80000000U)
+#define TMPFS_DIRCOOKIE_DUP_MIN TMPFS_DIRCOOKIE_DUP
+#define TMPFS_DIRCOOKIE_DUP_MAX \
+ (TMPFS_DIRCOOKIE_DUP | TMPFS_DIRCOOKIE_MASK)
/* --------------------------------------------------------------------- */
@@ -243,29 +221,31 @@ struct tmpfs_node {
dev_t tn_rdev;
/* Valid when tn_type == VDIR. */
- struct tn_dir{
+ struct tn_dir {
/* Pointer to the parent directory. The root
* directory has a pointer to itself in this field;
* this property identifies the root node. */
struct tmpfs_node * tn_parent;
- /* Head of a tail-queue that links the contents of
- * the directory together. See above for a
- * description of its contents. */
+ /* Head of a tree that links the contents of
+ * the directory together. */
struct tmpfs_dir tn_dirhead;
+ /* Head of a list the contains fake directory entries
+ * heads, i.e. entries with TMPFS_DIRCOOKIE_DUPHEAD
+ * flag. */
+ struct tmpfs_dir_duphead tn_dupindex;
+
/* Number and pointer of the first directory entry
* returned by the readdir operation if it were
* called again to continue reading data from the
* same directory as before. This is used to speed
* up reads of long directories, assuming that no
* more than one read is in progress at a given time.
- * Otherwise, these values are discarded and a linear
- * scan is performed from the beginning up to the
- * point where readdir starts returning values. */
+ * Otherwise, these values are discarded. */
off_t tn_readdir_lastn;
struct tmpfs_dirent * tn_readdir_lastp;
- }tn_dir;
+ } tn_dir;
/* Valid when tn_type == VLNK. */
/* The link's target, allocated from a string pool. */
@@ -419,9 +399,9 @@ int tmpfs_alloc_node(struct tmpfs_mount
char *, dev_t, struct tmpfs_node **);
void tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);
int tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,
- const char *, uint16_t, struct tmpfs_dirent **);
-void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *,
- boolean_t);
+ const char *, u_int, struct tmpfs_dirent **);
+void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *);
+void tmpfs_dirent_init(struct tmpfs_dirent *, const char *, u_int);
int tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int,
struct vnode **);
void tmpfs_free_vp(struct vnode *);
@@ -429,13 +409,12 @@ int tmpfs_alloc_file(struct vnode *, str
struct componentname *, char *);
void tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *);
void tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *);
+void tmpfs_dir_destroy(struct tmpfs_mount *, struct tmpfs_node *);
struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node,
struct tmpfs_node *f,
struct componentname *cnp);
-int tmpfs_dir_getdotdent(struct tmpfs_node *, struct uio *);
-int tmpfs_dir_getdotdotdent(struct tmpfs_node *, struct uio *);
-struct tmpfs_dirent * tmpfs_dir_lookupbycookie(struct tmpfs_node *, off_t);
-int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, off_t *);
+int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, int,
+ u_long *, int *);
int tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);
void tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);
int tmpfs_reg_resize(struct vnode *, off_t, boolean_t);
@@ -467,8 +446,8 @@ int tmpfs_truncate(struct vnode *, off_t
* with a length of 'len'.
*/
#define TMPFS_DIRENT_MATCHES(de, name, len) \
- (de->td_namelen == (uint16_t)len && \
- bcmp((de)->td_name, (name), (de)->td_namelen) == 0)
+ (de->td_namelen == len && \
+ bcmp((de)->ud.td_name, (name), (de)->td_namelen) == 0)
/* --------------------------------------------------------------------- */
@@ -476,11 +455,10 @@ int tmpfs_truncate(struct vnode *, off_t
* Ensures that the node pointed by 'node' is a directory and that its
* contents are consistent with respect to directories.
*/
-#define TMPFS_VALIDATE_DIR(node) \
- MPASS((node)->tn_type == VDIR); \
- MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
- MPASS((node)->tn_dir.tn_readdir_lastp == NULL || \
- tmpfs_dircookie((node)->tn_dir.tn_readdir_lastp) == (node)->tn_dir.tn_readdir_lastn);
+#define TMPFS_VALIDATE_DIR(node) do { \
+ MPASS((node)->tn_type == VDIR); \
+ MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
+} while (0)
/* --------------------------------------------------------------------- */
Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_subr.c Sun Jan 6 21:56:58 2013 (r245114)
+++ head/sys/fs/tmpfs/tmpfs_subr.c Sun Jan 6 22:15:44 2013 (r245115)
@@ -37,6 +37,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/fnv_hash.h>
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/proc.h>
@@ -58,6 +59,11 @@ __FBSDID("$FreeBSD$");
#include <fs/tmpfs/tmpfs_fifoops.h>
#include <fs/tmpfs/tmpfs_vnops.h>
+struct tmpfs_dir_cursor {
+ struct tmpfs_dirent *tdc_current;
+ struct tmpfs_dirent *tdc_tree;
+};
+
SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system");
static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
@@ -87,6 +93,10 @@ SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory
&tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L",
"Amount of available memory and swap below which tmpfs growth stops");
+static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
+ struct tmpfs_dirent *b);
+RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
+
size_t
tmpfs_mem_avail(void)
{
@@ -188,7 +198,8 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
break;
case VDIR:
- TAILQ_INIT(&nnode->tn_dir.tn_dirhead);
+ RB_INIT(&nnode->tn_dir.tn_dirhead);
+ LIST_INIT(&nnode->tn_dir.tn_dupindex);
MPASS(parent != nnode);
MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
@@ -309,6 +320,49 @@ tmpfs_free_node(struct tmpfs_mount *tmp,
/* --------------------------------------------------------------------- */
+static __inline uint32_t
+tmpfs_dirent_hash(const char *name, u_int len)
+{
+ uint32_t hash;
+
+ hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
+#ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
+ hash &= 0xf;
+#endif
+ if (hash < TMPFS_DIRCOOKIE_MIN)
+ hash += TMPFS_DIRCOOKIE_MIN;
+
+ return (hash);
+}
+
+static __inline off_t
+tmpfs_dirent_cookie(struct tmpfs_dirent *de)
+{
+ MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
+
+ return (de->td_cookie);
+}
+
+static __inline boolean_t
+tmpfs_dirent_dup(struct tmpfs_dirent *de)
+{
+ return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
+}
+
+static __inline boolean_t
+tmpfs_dirent_duphead(struct tmpfs_dirent *de)
+{
+ return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
+}
+
+void
+tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
+{
+ de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
+ memcpy(de->ud.td_name, name, namelen);
+ de->td_namelen = namelen;
+}
+
/*
* Allocates a new directory entry for the node node with a name of name.
* The new directory entry is returned in *de.
@@ -320,17 +374,17 @@ tmpfs_free_node(struct tmpfs_mount *tmp,
*/
int
tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
- const char *name, uint16_t len, struct tmpfs_dirent **de)
+ const char *name, u_int len, struct tmpfs_dirent **de)
{
struct tmpfs_dirent *nde;
- nde = (struct tmpfs_dirent *)uma_zalloc(
- tmp->tm_dirent_pool, M_WAITOK);
- nde->td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
- nde->td_namelen = len;
- memcpy(nde->td_name, name, len);
-
+ nde = uma_zalloc(tmp->tm_dirent_pool, M_WAITOK);
nde->td_node = node;
+ if (name != NULL) {
+ nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
+ tmpfs_dirent_init(nde, name, len);
+ } else
+ nde->td_namelen = 0;
if (node != NULL)
node->tn_links++;
@@ -351,20 +405,17 @@ tmpfs_alloc_dirent(struct tmpfs_mount *t
* directory entry, as it may already have been released from the outside.
*/
void
-tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
- boolean_t node_exists)
+tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
{
- if (node_exists) {
- struct tmpfs_node *node;
+ struct tmpfs_node *node;
- node = de->td_node;
- if (node != NULL) {
- MPASS(node->tn_links > 0);
- node->tn_links--;
- }
+ node = de->td_node;
+ if (node != NULL) {
+ MPASS(node->tn_links > 0);
+ node->tn_links--;
}
-
- free(de->td_name, M_TMPFSNAME);
+ if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
+ free(de->ud.td_name, M_TMPFSNAME);
uma_zfree(tmp->tm_dirent_pool, de);
}
@@ -586,7 +637,7 @@ tmpfs_alloc_file(struct vnode *dvp, stru
/* Allocate a vnode for the new file. */
error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
if (error != 0) {
- tmpfs_free_dirent(tmp, de, TRUE);
+ tmpfs_free_dirent(tmp, de);
tmpfs_free_node(tmp, node);
goto out;
}
@@ -605,6 +656,215 @@ out:
/* --------------------------------------------------------------------- */
+static struct tmpfs_dirent *
+tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
+{
+ struct tmpfs_dirent *de;
+
+ de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
+ dc->tdc_tree = de;
+ if (de != NULL && tmpfs_dirent_duphead(de))
+ de = LIST_FIRST(&de->ud.td_duphead);
+ dc->tdc_current = de;
+
+ return (dc->tdc_current);
+}
+
+static struct tmpfs_dirent *
+tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
+{
+ struct tmpfs_dirent *de;
+
+ MPASS(dc->tdc_tree != NULL);
+ if (tmpfs_dirent_dup(dc->tdc_current)) {
+ dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
+ if (dc->tdc_current != NULL)
+ return (dc->tdc_current);
+ }
+ dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
+ &dnode->tn_dir.tn_dirhead, dc->tdc_tree);
+ if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
+ dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
+ MPASS(dc->tdc_current != NULL);
+ }
+
+ return (dc->tdc_current);
+}
+
+/* Lookup directory entry in RB-Tree. Function may return duphead entry. */
+static struct tmpfs_dirent *
+tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
+{
+ struct tmpfs_dirent *de, dekey;
+
+ dekey.td_hash = hash;
+ de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
+ return (de);
+}
+
+/* Lookup directory entry by cookie, initialize directory cursor accordingly. */
+static struct tmpfs_dirent *
+tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
+ struct tmpfs_dir_cursor *dc)
+{
+ struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
+ struct tmpfs_dirent *de, dekey;
+
+ MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
+
+ if (cookie == node->tn_dir.tn_readdir_lastn &&
+ (de = node->tn_dir.tn_readdir_lastp) != NULL) {
+ /* Protect against possible race, tn_readdir_last[pn]
+ * may be updated with only shared vnode lock held. */
+ if (cookie == tmpfs_dirent_cookie(de))
+ goto out;
+ }
+
+ if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
+ LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
+ uh.td_dup.index_entries) {
+ MPASS(tmpfs_dirent_dup(de));
+ if (de->td_cookie == cookie)
+ goto out;
+ /* dupindex list is sorted. */
+ if (de->td_cookie < cookie) {
+ de = NULL;
+ goto out;
+ }
+ }
+ MPASS(de == NULL);
+ goto out;
+ }
+
+ MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie);
+ dekey.td_hash = cookie;
+ /* Recover if direntry for cookie was removed */
+ de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
+ dc->tdc_tree = de;
+ dc->tdc_current = de;
+ if (de != NULL && tmpfs_dirent_duphead(de)) {
+ dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
+ MPASS(dc->tdc_current != NULL);
+ }
+ return (dc->tdc_current);
+
+out:
+ dc->tdc_tree = de;
+ dc->tdc_current = de;
+ if (de != NULL && tmpfs_dirent_dup(de))
+ dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
+ de->td_hash);
+ return (dc->tdc_current);
+}
+
+/*
+ * Looks for a directory entry in the directory represented by node.
+ * 'cnp' describes the name of the entry to look for. Note that the .
+ * and .. components are not allowed as they do not physically exist
+ * within directories.
+ *
+ * Returns a pointer to the entry when found, otherwise NULL.
+ */
+struct tmpfs_dirent *
+tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
+ struct componentname *cnp)
+{
+ struct tmpfs_dir_duphead *duphead;
+ struct tmpfs_dirent *de;
+ uint32_t hash;
+
+ MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
+ MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
+ cnp->cn_nameptr[1] == '.')));
+ TMPFS_VALIDATE_DIR(node);
+
+ hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
+ de = tmpfs_dir_xlookup_hash(node, hash);
+ if (de != NULL && tmpfs_dirent_duphead(de)) {
+ duphead = &de->ud.td_duphead;
+ LIST_FOREACH(de, duphead, uh.td_dup.entries) {
+ if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
+ cnp->cn_namelen))
+ break;
+ }
+ } else if (de != NULL) {
+ if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
+ cnp->cn_namelen))
+ de = NULL;
+ }
+ if (de != NULL && f != NULL && de->td_node != f)
+ de = NULL;
+
+ return (de);
+}
+
+/*
+ * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
+ * list, allocate new cookie value.
+ */
+static void
+tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
+ struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
+{
+ struct tmpfs_dir_duphead *dupindex;
+ struct tmpfs_dirent *de, *pde;
+
+ dupindex = &dnode->tn_dir.tn_dupindex;
+ de = LIST_FIRST(dupindex);
+ if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
+ if (de == NULL)
+ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
+ else
+ nde->td_cookie = de->td_cookie + 1;
+ MPASS(tmpfs_dirent_dup(nde));
+ LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
+ LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+ return;
+ }
+
+ /*
+ * Cookie numbers are near exhaustion. Scan dupindex list for unused
+ * numbers. dupindex list is sorted in descending order. Keep it so
+ * after inserting nde.
+ */
+ while (1) {
+ pde = de;
+ de = LIST_NEXT(de, uh.td_dup.index_entries);
+ if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
+ /*
+ * Last element of the index doesn't have minimal cookie
+ * value, use it.
+ */
+ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
+ LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
+ LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+ return;
+ } else if (de == NULL) {
+ /*
+ * We are so lucky have 2^30 hash duplicates in single
+ * directory :) Return largest possible cookie value.
+ * It should be fine except possible issues with
+ * VOP_READDIR restart.
+ */
+ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
+ LIST_INSERT_HEAD(dupindex, nde,
+ uh.td_dup.index_entries);
+ LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+ return;
+ }
+ if (de->td_cookie + 1 == pde->td_cookie ||
+ de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
+ continue; /* No hole or invalid cookie. */
+ nde->td_cookie = de->td_cookie + 1;
+ MPASS(tmpfs_dirent_dup(nde));
+ MPASS(pde->td_cookie > nde->td_cookie);
+ MPASS(nde->td_cookie > de->td_cookie);
+ LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
+ LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
+ return;
+ };
+}
+
/*
* Attaches the directory entry de to the directory represented by vp.
* Note that this does not change the link count of the node pointed by
@@ -614,10 +874,38 @@ void
tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
{
struct tmpfs_node *dnode;
+ struct tmpfs_dirent *xde, *nde;
ASSERT_VOP_ELOCKED(vp, __func__);
+ MPASS(de->td_namelen > 0);
+ MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
+ MPASS(de->td_cookie == de->td_hash);
+
dnode = VP_TO_TMPFS_DIR(vp);
- TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries);
+ dnode->tn_dir.tn_readdir_lastn = 0;
+ dnode->tn_dir.tn_readdir_lastp = NULL;
+
+ MPASS(!tmpfs_dirent_dup(de));
+ xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
+ if (xde != NULL && tmpfs_dirent_duphead(xde))
+ tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
+ else if (xde != NULL) {
+ /*
+ * Allocate new duphead. Swap xde with duphead to avoid
+ * adding/removing elements with the same hash.
+ */
+ MPASS(!tmpfs_dirent_dup(xde));
+ tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
+ &nde);
+ /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
+ memcpy(nde, xde, sizeof(*xde));
+ xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
+ LIST_INIT(&xde->ud.td_duphead);
+ xde->td_namelen = 0;
+ xde->td_node = NULL;
+ tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
+ tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
+ }
dnode->tn_size += sizeof(struct tmpfs_dirent);
dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
TMPFS_NODE_MODIFIED;
@@ -633,58 +921,61 @@ tmpfs_dir_attach(struct vnode *vp, struc
void
tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
{
+ struct tmpfs_mount *tmp;
+ struct tmpfs_dir *head;
struct tmpfs_node *dnode;
+ struct tmpfs_dirent *xde;
ASSERT_VOP_ELOCKED(vp, __func__);
- dnode = VP_TO_TMPFS_DIR(vp);
- if (dnode->tn_dir.tn_readdir_lastp == de) {
- dnode->tn_dir.tn_readdir_lastn = 0;
- dnode->tn_dir.tn_readdir_lastp = NULL;
- }
+ dnode = VP_TO_TMPFS_DIR(vp);
+ head = &dnode->tn_dir.tn_dirhead;
+ dnode->tn_dir.tn_readdir_lastn = 0;
+ dnode->tn_dir.tn_readdir_lastp = NULL;
+
+ if (tmpfs_dirent_dup(de)) {
+ /* Remove duphead if de was last entry. */
+ if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
+ xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
+ MPASS(tmpfs_dirent_duphead(xde));
+ } else
+ xde = NULL;
+ LIST_REMOVE(de, uh.td_dup.entries);
+ LIST_REMOVE(de, uh.td_dup.index_entries);
+ if (xde != NULL) {
+ if (LIST_EMPTY(&xde->ud.td_duphead)) {
+ RB_REMOVE(tmpfs_dir, head, xde);
+ tmp = VFS_TO_TMPFS(vp->v_mount);
+ MPASS(xde->td_node == NULL);
+ tmpfs_free_dirent(tmp, xde);
+ }
+ }
+ } else
+ RB_REMOVE(tmpfs_dir, head, de);
- TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries);
dnode->tn_size -= sizeof(struct tmpfs_dirent);
dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
TMPFS_NODE_MODIFIED;
}
-/* --------------------------------------------------------------------- */
-
-/*
- * Looks for a directory entry in the directory represented by node.
- * 'cnp' describes the name of the entry to look for. Note that the .
- * and .. components are not allowed as they do not physically exist
- * within directories.
- *
- * Returns a pointer to the entry when found, otherwise NULL.
- */
-struct tmpfs_dirent *
-tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
- struct componentname *cnp)
+void
+tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
{
- boolean_t found;
- struct tmpfs_dirent *de;
-
- MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
- MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
- cnp->cn_nameptr[1] == '.')));
- TMPFS_VALIDATE_DIR(node);
+ struct tmpfs_dirent *de, *dde, *nde;
- found = 0;
- TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
- if (f != NULL && de->td_node != f)
- continue;
- MPASS(cnp->cn_namelen < 0xffff);
- if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
- bcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
- found = 1;
- break;
+ RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
+ RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
+ /* Node may already be destroyed. */
+ de->td_node = NULL;
+ if (tmpfs_dirent_duphead(de)) {
+ while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
+ LIST_REMOVE(dde, uh.td_dup.entries);
+ dde->td_node = NULL;
+ tmpfs_free_dirent(tmp, dde);
+ }
}
+ tmpfs_free_dirent(tmp, de);
}
- node->tn_status |= TMPFS_NODE_ACCESSED;
-
- return found ? de : NULL;
}
/* --------------------------------------------------------------------- */
@@ -696,7 +987,7 @@ tmpfs_dir_lookup(struct tmpfs_node *node
* hold the directory entry or an appropriate error code if another
* error happens.
*/
-int
+static int
tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
{
int error;
@@ -713,12 +1004,9 @@ tmpfs_dir_getdotdent(struct tmpfs_node *
dent.d_reclen = GENERIC_DIRSIZ(&dent);
if (dent.d_reclen > uio->uio_resid)
- error = -1;
- else {
+ error = EJUSTRETURN;
+ else
error = uiomove(&dent, dent.d_reclen, uio);
- if (error == 0)
- uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
- }
node->tn_status |= TMPFS_NODE_ACCESSED;
@@ -734,7 +1022,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *
* hold the directory entry or an appropriate error code if another
* error happens.
*/
-int
+static int
tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
{
int error;
@@ -763,19 +1051,9 @@ tmpfs_dir_getdotdotdent(struct tmpfs_nod
dent.d_reclen = GENERIC_DIRSIZ(&dent);
if (dent.d_reclen > uio->uio_resid)
- error = -1;
- else {
+ error = EJUSTRETURN;
+ else
error = uiomove(&dent, dent.d_reclen, uio);
- if (error == 0) {
- struct tmpfs_dirent *de;
-
- de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
- if (de == NULL)
- uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
- else
- uio->uio_offset = tmpfs_dircookie(de);
- }
- }
node->tn_status |= TMPFS_NODE_ACCESSED;
@@ -785,30 +1063,6 @@ tmpfs_dir_getdotdotdent(struct tmpfs_nod
/* --------------------------------------------------------------------- */
/*
- * Lookup a directory entry by its associated cookie.
- */
-struct tmpfs_dirent *
-tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
-{
- struct tmpfs_dirent *de;
-
- if (cookie == node->tn_dir.tn_readdir_lastn &&
- node->tn_dir.tn_readdir_lastp != NULL) {
- return node->tn_dir.tn_readdir_lastp;
- }
-
- TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) {
- if (tmpfs_dircookie(de) == cookie) {
- break;
- }
- }
-
- return de;
-}
-
-/* --------------------------------------------------------------------- */
-
-/*
* Helper function for tmpfs_readdir. Returns as much directory entries
* as can fit in the uio space. The read starts at uio->uio_offset.
* The function returns 0 on success, -1 if there was not enough space
@@ -816,27 +1070,47 @@ tmpfs_dir_lookupbycookie(struct tmpfs_no
* error code if another error happens.
*/
int
-tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
+tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, int cnt,
+ u_long *cookies, int *ncookies)
{
- int error;
- off_t startcookie;
+ struct tmpfs_dir_cursor dc;
struct tmpfs_dirent *de;
+ off_t off;
+ int error;
TMPFS_VALIDATE_DIR(node);
- /* Locate the first directory entry we have to return. We have cached
- * the last readdir in the node, so use those values if appropriate.
- * Otherwise do a linear scan to find the requested entry. */
- startcookie = uio->uio_offset;
- MPASS(startcookie != TMPFS_DIRCOOKIE_DOT);
- MPASS(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
- if (startcookie == TMPFS_DIRCOOKIE_EOF) {
- return 0;
- } else {
- de = tmpfs_dir_lookupbycookie(node, startcookie);
- }
- if (de == NULL) {
- return EINVAL;
+ off = 0;
+ switch (uio->uio_offset) {
+ case TMPFS_DIRCOOKIE_DOT:
+ error = tmpfs_dir_getdotdent(node, uio);
+ if (error != 0)
+ return (error);
+ uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
+ if (cnt != 0)
+ cookies[(*ncookies)++] = off = uio->uio_offset;
+ case TMPFS_DIRCOOKIE_DOTDOT:
+ error = tmpfs_dir_getdotdotdent(node, uio);
+ if (error != 0)
+ return (error);
+ de = tmpfs_dir_first(node, &dc);
+ if (de == NULL)
+ uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
+ else
+ uio->uio_offset = tmpfs_dirent_cookie(de);
+ if (cnt != 0)
+ cookies[(*ncookies)++] = off = uio->uio_offset;
+ if (de == NULL)
+ return (0);
+ break;
+ case TMPFS_DIRCOOKIE_EOF:
+ return (0);
+ default:
+ de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
+ if (de == NULL)
+ return (EINVAL);
+ if (cnt != 0)
+ off = tmpfs_dirent_cookie(de);
}
/* Read as much entries as possible; i.e., until we reach the end of
@@ -887,14 +1161,14 @@ tmpfs_dir_getdents(struct tmpfs_node *no
}
d.d_namlen = de->td_namelen;
MPASS(de->td_namelen < sizeof(d.d_name));
- (void)memcpy(d.d_name, de->td_name, de->td_namelen);
+ (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
d.d_name[de->td_namelen] = '\0';
d.d_reclen = GENERIC_DIRSIZ(&d);
/* Stop reading if the directory entry we are treating is
* bigger than the amount of data that can be returned. */
if (d.d_reclen > uio->uio_resid) {
- error = -1;
+ error = EJUSTRETURN;
break;
}
@@ -902,21 +1176,30 @@ tmpfs_dir_getdents(struct tmpfs_node *no
* advance pointers. */
error = uiomove(&d, d.d_reclen, uio);
if (error == 0) {
- (*cntp)++;
- de = TAILQ_NEXT(de, td_entries);
+ de = tmpfs_dir_next(node, &dc);
+ if (cnt != 0) {
+ if (de == NULL)
+ off = TMPFS_DIRCOOKIE_EOF;
+ else
+ off = tmpfs_dirent_cookie(de);
+ MPASS(*ncookies < cnt);
+ cookies[(*ncookies)++] = off;
+ }
}
} while (error == 0 && uio->uio_resid > 0 && de != NULL);
/* Update the offset and cache. */
- if (de == NULL) {
- uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
- node->tn_dir.tn_readdir_lastn = 0;
- node->tn_dir.tn_readdir_lastp = NULL;
- } else {
- node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de);
- node->tn_dir.tn_readdir_lastp = de;
+ if (cnt == 0) {
+ if (de == NULL)
+ off = TMPFS_DIRCOOKIE_EOF;
+ else
+ off = tmpfs_dirent_cookie(de);
}
+ uio->uio_offset = off;
+ node->tn_dir.tn_readdir_lastn = off;
+ node->tn_dir.tn_readdir_lastp = de;
+
node->tn_status |= TMPFS_NODE_ACCESSED;
return error;
}
@@ -943,7 +1226,7 @@ tmpfs_dir_whiteout_remove(struct vnode *
de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
MPASS(de != NULL && de->td_node == NULL);
tmpfs_dir_detach(dvp, de);
- tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de, TRUE);
+ tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
}
/* --------------------------------------------------------------------- */
@@ -1436,3 +1719,15 @@ out:
return error;
}
+
+static __inline int
+tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
+{
+ if (a->td_hash > b->td_hash)
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list