svn commit: r220986 - stable/8/sys/ufs/ufs
Konstantin Belousov
kib at FreeBSD.org
Sun Apr 24 11:01:42 UTC 2011
Author: kib
Date: Sun Apr 24 11:01:42 2011
New Revision: 220986
URL: http://svn.freebsd.org/changeset/base/220986
Log:
Merge the part of r207141 that fixes the locking for ufs_rename() (and
r218838 followup).
Adopt the SU calls to the stable/8 SU implementation, with the help from
Kirk.
PR: kern/156545
Reviewed by: mckusick
Tested by: pho
Modified:
stable/8/sys/ufs/ufs/inode.h
stable/8/sys/ufs/ufs/ufs_extern.h
stable/8/sys/ufs/ufs/ufs_lookup.c
stable/8/sys/ufs/ufs/ufs_vnops.c
Directory Properties:
stable/8/sys/ (props changed)
stable/8/sys/amd64/include/xen/ (props changed)
stable/8/sys/cddl/contrib/opensolaris/ (props changed)
stable/8/sys/contrib/dev/acpica/ (props changed)
stable/8/sys/contrib/pf/ (props changed)
Modified: stable/8/sys/ufs/ufs/inode.h
==============================================================================
--- stable/8/sys/ufs/ufs/inode.h Sun Apr 24 10:47:56 2011 (r220985)
+++ stable/8/sys/ufs/ufs/inode.h Sun Apr 24 11:01:42 2011 (r220986)
@@ -120,7 +120,7 @@ struct inode {
#define IN_CHANGE 0x0002 /* Inode change time update request. */
#define IN_UPDATE 0x0004 /* Modification time update request. */
#define IN_MODIFIED 0x0008 /* Inode has been modified. */
-#define IN_RENAME 0x0010 /* Inode is being renamed. */
+#define IN_NEEDSYNC 0x0010 /* Inode requires fsync. */
#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */
#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */
#define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the
Modified: stable/8/sys/ufs/ufs/ufs_extern.h
==============================================================================
--- stable/8/sys/ufs/ufs/ufs_extern.h Sun Apr 24 10:47:56 2011 (r220985)
+++ stable/8/sys/ufs/ufs/ufs_extern.h Sun Apr 24 11:01:42 2011 (r220986)
@@ -57,7 +57,7 @@ int ufs_bmap(struct vop_bmap_args *);
int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
struct buf *, int *, int *);
int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
-int ufs_checkpath(ino_t, struct inode *, struct ucred *);
+int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
void ufs_dirbad(struct inode *, doff_t, char *);
int ufs_dirbadentry(struct vnode *, struct direct *, int);
int ufs_dirempty(struct inode *, ino_t, struct ucred *);
@@ -66,9 +66,11 @@ int ufs_extwrite(struct vop_write_args
void ufs_makedirentry(struct inode *, struct componentname *,
struct direct *);
int ufs_direnter(struct vnode *, struct vnode *, struct direct *,
- struct componentname *, struct buf *);
+ struct componentname *, struct buf *, int);
int ufs_dirremove(struct vnode *, struct inode *, int, int);
int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
+int ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
+ ino_t *);
int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
int ufs_inactive(struct vop_inactive_args *);
int ufs_init(struct vfsconf *);
Modified: stable/8/sys/ufs/ufs/ufs_lookup.c
==============================================================================
--- stable/8/sys/ufs/ufs/ufs_lookup.c Sun Apr 24 10:47:56 2011 (r220985)
+++ stable/8/sys/ufs/ufs/ufs_lookup.c Sun Apr 24 11:01:42 2011 (r220986)
@@ -76,9 +76,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, C
/* true if old FS format...*/
#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
-static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *,
- ino_t *);
-
#ifdef QUOTA
static int
ufs_lookup_upgrade_lock(struct vnode *vp)
@@ -214,11 +211,11 @@ ufs_lookup(ap)
} */ *ap;
{
- return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
+ return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
}
-static int
-ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
+int
+ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
ino_t *dd_ino)
{
struct inode *dp; /* inode for directory being searched */
@@ -556,6 +553,8 @@ notfound:
return (ENOENT);
found:
+ if (dd_ino != NULL)
+ *dd_ino = ino;
if (numdirpasses == 2)
nchstats.ncs_pass2++;
/*
@@ -578,11 +577,6 @@ found:
if ((flags & ISLASTCN) && nameiop == LOOKUP)
dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
- if (dd_ino != NULL) {
- *dd_ino = ino;
- return (0);
- }
-
/*
* If deleting, and at end of pathname, return
* parameters which can be used to remove file.
@@ -590,17 +584,6 @@ found:
if (nameiop == DELETE && (flags & ISLASTCN)) {
if (flags & LOCKPARENT)
ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
- if ((error = VFS_VGET(vdp->v_mount, ino,
- LK_EXCLUSIVE, &tdp)) != 0)
- return (error);
-
- error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
- if (error) {
- vput(tdp);
- return (error);
- }
-
-
/*
* Return pointer to current entry in dp->i_offset,
* and distance past previous entry (if there
@@ -617,6 +600,16 @@ found:
dp->i_count = 0;
else
dp->i_count = dp->i_offset - prevoff;
+ if (dd_ino != NULL)
+ return (0);
+ if ((error = VFS_VGET(vdp->v_mount, ino,
+ LK_EXCLUSIVE, &tdp)) != 0)
+ return (error);
+ error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
+ if (error) {
+ vput(tdp);
+ return (error);
+ }
if (dp->i_number == ino) {
VREF(vdp);
*vpp = vdp;
@@ -648,6 +641,8 @@ found:
dp->i_offset = i_offset;
if (dp->i_number == ino)
return (EISDIR);
+ if (dd_ino != NULL)
+ return (0);
if ((error = VFS_VGET(vdp->v_mount, ino,
LK_EXCLUSIVE, &tdp)) != 0)
return (error);
@@ -682,6 +677,8 @@ found:
cnp->cn_flags |= SAVENAME;
return (0);
}
+ if (dd_ino != NULL)
+ return (0);
/*
* Step through the translation in the name. We do not `vput' the
@@ -713,7 +710,7 @@ found:
* to the inode we looked up before vdp lock was
* dropped.
*/
- error = ufs_lookup_(pdp, NULL, cnp, &ino1);
+ error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
if (error) {
vput(tdp);
return (error);
@@ -865,12 +862,13 @@ ufs_makedirentry(ip, cnp, newdirp)
* soft dependency code).
*/
int
-ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
+ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
struct vnode *dvp;
struct vnode *tvp;
struct direct *dirp;
struct componentname *cnp;
struct buf *newdirbp;
+ int isrename;
{
struct ucred *cr;
struct thread *td;
@@ -943,22 +941,30 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
blkoff += DIRBLKSIZ;
}
if (softdep_setup_directory_add(bp, dp, dp->i_offset,
- dirp->d_ino, newdirbp, 1) == 0) {
- bdwrite(bp);
+ dirp->d_ino, newdirbp, 1))
+ dp->i_flag |= IN_NEEDSYNC;
+#ifdef JOURNALED_SOFTUPDATES
+ if (newdirbp)
+ bdwrite(newdirbp);
+#endif
+ bdwrite(bp);
+ if ((dp->i_flag & IN_NEEDSYNC) == 0)
return (UFS_UPDATE(dvp, 0));
- }
- /* We have just allocated a directory block in an
- * indirect block. Rather than tracking when it gets
- * claimed by the inode, we simply do a VOP_FSYNC
- * now to ensure that it is there (in case the user
- * does a future fsync). Note that we have to unlock
- * the inode for the entry that we just entered, as
- * the VOP_FSYNC may need to lock other inodes which
- * can lead to deadlock if we also hold a lock on
- * the newly entered node.
+ /*
+ * We have just allocated a directory block in an
+ * indirect block. We must prevent holes in the
+ * directory created if directory entries are
+ * written out of order. To accomplish this we
+ * fsync when we extend a directory into indirects.
+ * During rename it's not safe to drop the tvp lock
+ * so sync must be delayed until it is.
+ *
+ * This synchronous step could be removed if fsck and
+ * the kernel were taught to fill in sparse
+ * directories rather than panic.
*/
- if ((error = bwrite(bp)))
- return (error);
+ if (isrename)
+ return (0);
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
error = VOP_FSYNC(dvp, MNT_WAIT, td);
@@ -1099,6 +1105,10 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
(void) softdep_setup_directory_add(bp, dp,
dp->i_offset + (caddr_t)ep - dirbuf,
dirp->d_ino, newdirbp, 0);
+#ifdef JOURNALED_SOFTUPDATES
+ if (newdirbp != NULL)
+ bdwrite(newdirbp);
+#endif
bdwrite(bp);
} else {
if (DOINGASYNC(dvp)) {
@@ -1116,7 +1126,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
* lock other inodes which can lead to deadlock if we also hold a
* lock on the newly entered node.
*/
- if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
+ if (isrename == 0 && error == 0 &&
+ dp->i_endoff && dp->i_endoff < dp->i_size) {
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
#ifdef UFS_DIRHASH
@@ -1386,25 +1397,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct
/*
* Check if source directory is in the path of the target directory.
- * Target is supplied locked, source is unlocked.
- * The target is always vput before returning.
*/
int
-ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
+ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino)
{
- struct vnode *vp, *vp1;
+ struct mount *mp;
+ struct vnode *tvp, *vp, *vp1;
int error;
ino_t dd_ino;
- vp = ITOV(target);
- if (target->i_number == source_ino) {
- error = EEXIST;
- goto out;
- }
- error = 0;
+ vp = tvp = ITOV(target);
+ mp = vp->v_mount;
+ *wait_ino = 0;
+ if (target->i_number == source_ino)
+ return (EEXIST);
+ if (target->i_number == parent_ino)
+ return (0);
if (target->i_number == ROOTINO)
- goto out;
-
+ return (0);
+ error = 0;
for (;;) {
error = ufs_dir_dd_ino(vp, cred, &dd_ino);
if (error != 0)
@@ -1415,9 +1426,13 @@ ufs_checkpath(ino_t source_ino, struct i
}
if (dd_ino == ROOTINO)
break;
- error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1);
- if (error != 0)
+ if (dd_ino == parent_ino)
+ break;
+ error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1);
+ if (error != 0) {
+ *wait_ino = dd_ino;
break;
+ }
/* Recheck that ".." still points to vp1 after relock of vp */
error = ufs_dir_dd_ino(vp, cred, &dd_ino);
if (error != 0) {
@@ -1429,14 +1444,14 @@ ufs_checkpath(ino_t source_ino, struct i
vput(vp1);
continue;
}
- vput(vp);
+ if (vp != tvp)
+ vput(vp);
vp = vp1;
}
-out:
if (error == ENOTDIR)
- printf("checkpath: .. not a directory\n");
- if (vp != NULL)
+ panic("checkpath: .. not a directory\n");
+ if (vp != tvp)
vput(vp);
return (error);
}
Modified: stable/8/sys/ufs/ufs/ufs_vnops.c
==============================================================================
--- stable/8/sys/ufs/ufs/ufs_vnops.c Sun Apr 24 10:47:56 2011 (r220985)
+++ stable/8/sys/ufs/ufs/ufs_vnops.c Sun Apr 24 11:01:42 2011 (r220986)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/mount.h>
@@ -114,6 +115,8 @@ static vop_close_t ufsfifo_close;
static vop_kqfilter_t ufsfifo_kqfilter;
static vop_pathconf_t ufsfifo_pathconf;
+SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
+
/*
* A virgin directory (no blushing please).
*/
@@ -992,7 +995,7 @@ ufs_link(ap)
error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
if (!error) {
ufs_makedirentry(ip, cnp, &newdir);
- error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
+ error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
}
if (error) {
@@ -1043,7 +1046,7 @@ ufs_whiteout(ap)
newdir.d_namlen = cnp->cn_namelen;
bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
newdir.d_type = DT_WHT;
- error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
+ error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
break;
case DELETE:
@@ -1062,6 +1065,11 @@ ufs_whiteout(ap)
return (error);
}
+static volatile int rename_restarts;
+SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
+ __DEVOLATILE(int *, &rename_restarts), 0,
+ "Times rename had to restart due to lock contention");
+
/*
* Rename system call.
* rename("foo", "bar");
@@ -1101,111 +1109,185 @@ ufs_rename(ap)
struct vnode *tdvp = ap->a_tdvp;
struct vnode *fvp = ap->a_fvp;
struct vnode *fdvp = ap->a_fdvp;
+ struct vnode *nvp;
struct componentname *tcnp = ap->a_tcnp;
struct componentname *fcnp = ap->a_fcnp;
struct thread *td = fcnp->cn_thread;
- struct inode *ip, *xp, *dp;
+ struct inode *fip, *tip, *tdp, *fdp;
struct direct newdir;
- int doingdirectory = 0, oldparent = 0, newparent = 0;
+ off_t endoff;
+ int doingdirectory, newparent;
int error = 0, ioflag;
- ino_t fvp_ino;
+ struct mount *mp;
+ ino_t ino;
#ifdef INVARIANTS
if ((tcnp->cn_flags & HASBUF) == 0 ||
(fcnp->cn_flags & HASBUF) == 0)
panic("ufs_rename: no name");
#endif
+ endoff = 0;
+ mp = tdvp->v_mount;
+ VOP_UNLOCK(tdvp, 0);
+ if (tvp && tvp != tdvp)
+ VOP_UNLOCK(tvp, 0);
/*
* Check for cross-device rename.
*/
if ((fvp->v_mount != tdvp->v_mount) ||
(tvp && (fvp->v_mount != tvp->v_mount))) {
error = EXDEV;
-abortit:
- if (tdvp == tvp)
- vrele(tdvp);
- else
- vput(tdvp);
- if (tvp)
- vput(tvp);
- vrele(fdvp);
+ mp = NULL;
+ goto releout;
+ }
+ error = vfs_busy(mp, 0);
+ if (error) {
+ mp = NULL;
+ goto releout;
+ }
+relock:
+ /*
+ * We need to acquire 2 to 4 locks depending on whether tvp is NULL
+ * and fdvp and tdvp are the same directory. Subsequently we need
+ * to double-check all paths and in the directory rename case we
+ * need to verify that we are not creating a directory loop. To
+ * handle this we acquire all but fdvp using non-blocking
+ * acquisitions. If we fail to acquire any lock in the path we will
+ * drop all held locks, acquire the new lock in a blocking fashion,
+ * and then release it and restart the rename. This acquire/release
+ * step ensures that we do not spin on a lock waiting for release.
+ */
+ error = vn_lock(fdvp, LK_EXCLUSIVE);
+ if (error)
+ goto releout;
+ if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+ VOP_UNLOCK(fdvp, 0);
+ error = vn_lock(tdvp, LK_EXCLUSIVE);
+ if (error)
+ goto releout;
+ VOP_UNLOCK(tdvp, 0);
+ atomic_add_int(&rename_restarts, 1);
+ goto relock;
+ }
+ /*
+ * Re-resolve fvp to be certain it still exists and fetch the
+ * correct vnode.
+ */
+ error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+ if (error) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ goto releout;
+ }
+ error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+ if (error) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ if (error != EBUSY)
+ goto releout;
+ error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+ if (error != 0)
+ goto releout;
+ VOP_UNLOCK(nvp, 0);
vrele(fvp);
- return (error);
+ fvp = nvp;
+ atomic_add_int(&rename_restarts, 1);
+ goto relock;
+ }
+ vrele(fvp);
+ fvp = nvp;
+ /*
+ * Re-resolve tvp and acquire the vnode lock if present.
+ */
+ error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
+ if (error != 0 && error != EJUSTRETURN) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ VOP_UNLOCK(fvp, 0);
+ goto releout;
}
-
+ /*
+ * If tvp disappeared we just carry on.
+ */
+ if (error == EJUSTRETURN && tvp != NULL) {
+ vrele(tvp);
+ tvp = NULL;
+ }
+ /*
+ * Get the tvp ino if the lookup succeeded. We may have to restart
+ * if the non-blocking acquire fails.
+ */
+ if (error == 0) {
+ nvp = NULL;
+ error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
+ if (tvp)
+ vrele(tvp);
+ tvp = nvp;
+ if (error) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ VOP_UNLOCK(fvp, 0);
+ if (error != EBUSY)
+ goto releout;
+ error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
+ if (error != 0)
+ goto releout;
+ VOP_UNLOCK(nvp, 0);
+ atomic_add_int(&rename_restarts, 1);
+ goto relock;
+ }
+ }
+ fdp = VTOI(fdvp);
+ fip = VTOI(fvp);
+ tdp = VTOI(tdvp);
+ tip = NULL;
+ if (tvp)
+ tip = VTOI(tvp);
if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
(VTOI(tdvp)->i_flags & APPEND))) {
error = EPERM;
- goto abortit;
+ goto unlockout;
}
-
/*
* Renaming a file to itself has no effect. The upper layers should
- * not call us in that case. Temporarily just warn if they do.
+ * not call us in that case. However, things could change after
+ * we drop the locks above.
*/
if (fvp == tvp) {
- printf("ufs_rename: fvp == tvp (can't happen)\n");
error = 0;
- goto abortit;
+ goto unlockout;
}
-
- if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
- goto abortit;
- dp = VTOI(fdvp);
- ip = VTOI(fvp);
- if (ip->i_nlink >= LINK_MAX) {
- VOP_UNLOCK(fvp, 0);
+ doingdirectory = 0;
+ newparent = 0;
+ ino = fip->i_number;
+ if (fip->i_nlink >= LINK_MAX) {
error = EMLINK;
- goto abortit;
+ goto unlockout;
}
- if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
- || (dp->i_flags & APPEND)) {
- VOP_UNLOCK(fvp, 0);
+ if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
+ || (fdp->i_flags & APPEND)) {
error = EPERM;
- goto abortit;
+ goto unlockout;
}
- if ((ip->i_mode & IFMT) == IFDIR) {
+ if ((fip->i_mode & IFMT) == IFDIR) {
/*
* Avoid ".", "..", and aliases of "." for obvious reasons.
*/
if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
- dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
- (ip->i_flag & IN_RENAME)) {
- VOP_UNLOCK(fvp, 0);
+ fdp == fip ||
+ (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
error = EINVAL;
- goto abortit;
+ goto unlockout;
}
- ip->i_flag |= IN_RENAME;
- oldparent = dp->i_number;
+ if (fdp->i_number != tdp->i_number)
+ newparent = tdp->i_number;
doingdirectory = 1;
}
- vrele(fdvp);
-
- /*
- * When the target exists, both the directory
- * and target vnodes are returned locked.
- */
- dp = VTOI(tdvp);
- xp = NULL;
- if (tvp)
- xp = VTOI(tvp);
-
- /*
- * 1) Bump link count while we're moving stuff
- * around. If we crash somewhere before
- * completing our work, the link count
- * may be wrong, but correctable.
- */
- ip->i_effnlink++;
- ip->i_nlink++;
- DIP_SET(ip, i_nlink, ip->i_nlink);
- ip->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(fvp))
- softdep_change_linkcnt(ip);
- if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
- DOINGASYNC(fvp)))) != 0) {
- VOP_UNLOCK(fvp, 0);
- goto bad;
+ if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
+ (tvp != NULL && tvp->v_type == VDIR &&
+ tvp->v_mountedhere != NULL)) {
+ error = EXDEV;
+ goto unlockout;
}
/*
@@ -1214,35 +1296,55 @@ abortit:
* directory hierarchy above the target, as this would
* orphan everything below the source directory. Also
* the user must have write permission in the source so
- * as to be able to change "..". We must repeat the call
- * to namei, as the parent directory is unlocked by the
- * call to checkpath().
- */
- error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
- fvp_ino = ip->i_number;
- VOP_UNLOCK(fvp, 0);
- if (oldparent != dp->i_number)
- newparent = dp->i_number;
+ * as to be able to change "..".
+ */
if (doingdirectory && newparent) {
- if (error) /* write access check above */
- goto bad;
- if (xp != NULL)
- vput(tvp);
- error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred);
+ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
if (error)
- goto out;
+ goto unlockout;
+ error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
+ &ino);
+ /*
+ * We encountered a lock that we have to wait for. Unlock
+ * everything else and VGET before restarting.
+ */
+ if (ino) {
+ VOP_UNLOCK(fdvp, 0);
+ VOP_UNLOCK(fvp, 0);
+ VOP_UNLOCK(tdvp, 0);
+ if (tvp)
+ VOP_UNLOCK(tvp, 0);
+ error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
+ if (error == 0)
+ vput(nvp);
+ atomic_add_int(&rename_restarts, 1);
+ goto relock;
+ }
+ if (error)
+ goto unlockout;
if ((tcnp->cn_flags & SAVESTART) == 0)
panic("ufs_rename: lost to startdir");
- VREF(tdvp);
- error = relookup(tdvp, &tvp, tcnp);
- if (error)
- goto out;
- vrele(tdvp);
- dp = VTOI(tdvp);
- xp = NULL;
- if (tvp)
- xp = VTOI(tvp);
}
+ if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
+ tdp->i_effnlink == 0)
+ panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
+
+ /*
+ * 1) Bump link count while we're moving stuff
+ * around. If we crash somewhere before
+ * completing our work, the link count
+ * may be wrong, but correctable.
+ */
+ fip->i_effnlink++;
+ fip->i_nlink++;
+ DIP_SET(fip, i_nlink, fip->i_nlink);
+ fip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(fvp))
+ softdep_change_linkcnt(fip);
+ error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
+ if (error)
+ goto bad;
+
/*
* 2) If target doesn't exist, link the target
* to the source and unlink the source.
@@ -1250,52 +1352,37 @@ abortit:
* entry to reference the source inode and
* expunge the original entry's existence.
*/
- if (xp == NULL) {
- if (dp->i_dev != ip->i_dev)
+ if (tip == NULL) {
+ if (tdp->i_dev != fip->i_dev)
panic("ufs_rename: EXDEV");
- /*
- * Account for ".." in new directory.
- * When source and destination have the same
- * parent we don't fool with the link count.
- */
if (doingdirectory && newparent) {
- if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+ /*
+ * Account for ".." in new directory.
+ * When source and destination have the same
+ * parent we don't adjust the link count. The
+ * actual link modification is completed when
+ * .. is rewritten below.
+ */
+ if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
error = EMLINK;
goto bad;
}
- dp->i_effnlink++;
- dp->i_nlink++;
- DIP_SET(dp, i_nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
- error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
- DOINGASYNC(tdvp)));
- if (error)
- goto bad;
}
- ufs_makedirentry(ip, tcnp, &newdir);
- error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
- if (error) {
- if (doingdirectory && newparent) {
- dp->i_effnlink--;
- dp->i_nlink--;
- DIP_SET(dp, i_nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
- (void)UFS_UPDATE(tdvp, 1);
- }
+ ufs_makedirentry(fip, tcnp, &newdir);
+ error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
+ if (error)
goto bad;
- }
- vput(tdvp);
+ /* Setup tdvp for directory compaction if needed. */
+ if (tdp->i_count && tdp->i_endoff &&
+ tdp->i_endoff < tdp->i_size)
+ endoff = tdp->i_endoff;
} else {
- if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+ if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
panic("ufs_rename: EXDEV");
/*
* Short circuit rename(foo, foo).
*/
- if (xp->i_number == ip->i_number)
+ if (tip->i_number == fip->i_number)
panic("ufs_rename: same file");
/*
* If the parent directory is "sticky", then the caller
@@ -1303,7 +1390,7 @@ abortit:
* destination of the rename. This implements append-only
* directories.
*/
- if ((dp->i_mode & S_ISTXT) &&
+ if ((tdp->i_mode & S_ISTXT) &&
VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
error = EPERM;
@@ -1314,9 +1401,9 @@ abortit:
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
- if ((xp->i_mode&IFMT) == IFDIR) {
- if ((xp->i_effnlink > 2) ||
- !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
+ if ((tip->i_mode & IFMT) == IFDIR) {
+ if ((tip->i_effnlink > 2) ||
+ !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
error = ENOTEMPTY;
goto bad;
}
@@ -1329,20 +1416,30 @@ abortit:
error = EISDIR;
goto bad;
}
- error = ufs_dirrewrite(dp, xp, ip->i_number,
- IFTODT(ip->i_mode),
- (doingdirectory && newparent) ? newparent : doingdirectory);
- if (error)
- goto bad;
if (doingdirectory) {
if (!newparent) {
- dp->i_effnlink--;
+ tdp->i_effnlink--;
if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
+ softdep_change_linkcnt(tdp);
}
- xp->i_effnlink--;
+ tip->i_effnlink--;
if (DOINGSOFTDEP(tvp))
- softdep_change_linkcnt(xp);
+ softdep_change_linkcnt(tip);
+ }
+ error = ufs_dirrewrite(tdp, tip, fip->i_number,
+ IFTODT(fip->i_mode),
+ (doingdirectory && newparent) ? newparent : doingdirectory);
+ if (error) {
+ if (doingdirectory) {
+ if (!newparent) {
+ tdp->i_effnlink++;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(tdp);
+ }
+ tip->i_effnlink++;
+ if (DOINGSOFTDEP(tvp))
+ softdep_change_linkcnt(tip);
+ }
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
@@ -1357,115 +1454,107 @@ abortit:
* them now.
*/
if (!newparent) {
- dp->i_nlink--;
- DIP_SET(dp, i_nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
+ tdp->i_nlink--;
+ DIP_SET(tdp, i_nlink, tdp->i_nlink);
+ tdp->i_flag |= IN_CHANGE;
}
- xp->i_nlink--;
- DIP_SET(xp, i_nlink, xp->i_nlink);
- xp->i_flag |= IN_CHANGE;
+ tip->i_nlink--;
+ DIP_SET(tip, i_nlink, tip->i_nlink);
+ tip->i_flag |= IN_CHANGE;
ioflag = IO_NORMAL;
if (!DOINGASYNC(tvp))
ioflag |= IO_SYNC;
+ /* Don't go to bad here as the new link exists. */
if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
tcnp->cn_cred, tcnp->cn_thread)) != 0)
- goto bad;
+ goto unlockout;
}
- vput(tdvp);
- vput(tvp);
- xp = NULL;
}
/*
- * 3) Unlink the source.
+ * 3) Unlink the source. We have to resolve the path again to
+ * fixup the directory offset and count for ufs_dirremove.
*/
- fcnp->cn_flags &= ~MODMASK;
- fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
- if ((fcnp->cn_flags & SAVESTART) == 0)
- panic("ufs_rename: lost from startdir");
- VREF(fdvp);
- error = relookup(fdvp, &fvp, fcnp);
- if (error == 0)
- vrele(fdvp);
- if (fvp != NULL) {
- xp = VTOI(fvp);
- dp = VTOI(fdvp);
- } else {
- /*
- * From name has disappeared. IN_RENAME is not sufficient
- * to protect against directory races due to timing windows,
- * so we have to remove the panic. XXX the only real way
- * to solve this issue is at a much higher level. By the
- * time we hit ufs_rename() it's too late.
- */
-#if 0
- if (doingdirectory)
- panic("ufs_rename: lost dir entry");
-#endif
- vrele(ap->a_fvp);
- return (0);
+ if (fdvp == tdvp) {
+ error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
+ if (error)
+ panic("ufs_rename: from entry went away!");
+ if (ino != fip->i_number)
+ panic("ufs_rename: ino mismatch %d != %d\n", ino,
+ fip->i_number);
}
/*
- * Ensure that the directory entry still exists and has not
- * changed while the new name has been entered. If the source is
- * a file then the entry may have been unlinked or renamed. In
- * either case there is no further work to be done. If the source
- * is a directory then it cannot have been rmdir'ed; the IN_RENAME
- * flag ensures that it cannot be moved by another rename or removed
- * by a rmdir.
- */
- if (xp != ip) {
- /*
- * From name resolves to a different inode. IN_RENAME is
- * not sufficient protection against timing window races
- * so we can't panic here. XXX the only real way
- * to solve this issue is at a much higher level. By the
- * time we hit ufs_rename() it's too late.
- */
-#if 0
- if (doingdirectory)
- panic("ufs_rename: lost dir entry");
-#endif
- } else {
+ * If the source is a directory with a
+ * new parent, the link count of the old
+ * parent directory must be decremented
+ * and ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
/*
- * If the source is a directory with a
- * new parent, the link count of the old
- * parent directory must be decremented
- * and ".." set to point to the new parent.
+ * If tip exists we simply use its link, otherwise we must
+ * add a new one.
*/
- if (doingdirectory && newparent) {
- xp->i_offset = mastertemplate.dot_reclen;
- ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
- cache_purge(fdvp);
- }
- error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
- xp->i_flag &= ~IN_RENAME;
- }
- if (dp)
- vput(fdvp);
- if (xp)
- vput(fvp);
- vrele(ap->a_fvp);
+ if (tip == NULL) {
+ tdp->i_effnlink++;
+ tdp->i_nlink++;
+ DIP_SET(tdp, i_nlink, tdp->i_nlink);
+ tdp->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(tdvp))
+ softdep_change_linkcnt(tdp);
+ error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
+ DOINGASYNC(tdvp)));
+ /* Don't go to bad here as the new link exists. */
+ if (error)
+ goto unlockout;
+ }
+ fip->i_offset = mastertemplate.dot_reclen;
+ ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
+ cache_purge(fdvp);
+ }
+ error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
+
+unlockout:
+ vput(fdvp);
+ vput(fvp);
+ if (tvp)
+ vput(tvp);
+ /*
+ * If compaction or fsync was requested do it now that other locks
+ * are no longer needed.
+ */
+ if (error == 0 && endoff != 0) {
+#ifdef UFS_DIRHASH
+ if (tdp->i_dirhash != NULL)
+ ufsdirhash_dirtrunc(tdp, endoff);
+#endif
+ UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
+ td);
+ }
+ if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
+ error = VOP_FSYNC(tdvp, MNT_WAIT, td);
+ vput(tdvp);
+ if (mp)
+ vfs_unbusy(mp);
return (error);
bad:
- if (xp)
- vput(ITOV(xp));
- vput(ITOV(dp));
-out:
- if (doingdirectory)
- ip->i_flag &= ~IN_RENAME;
- if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
- ip->i_effnlink--;
- ip->i_nlink--;
- DIP_SET(ip, i_nlink, ip->i_nlink);
- ip->i_flag |= IN_CHANGE;
- ip->i_flag &= ~IN_RENAME;
- if (DOINGSOFTDEP(fvp))
- softdep_change_linkcnt(ip);
- vput(fvp);
- } else
- vrele(fvp);
+ fip->i_effnlink--;
+ fip->i_nlink--;
+ DIP_SET(fip, i_nlink, fip->i_nlink);
+ fip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(fvp))
+ softdep_change_linkcnt(fip);
+ goto unlockout;
+
+releout:
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable
mailing list