svn commit: r347151 - in head: libexec/rtld-elf sys/compat/linux sys/fs/deadfs sys/fs/nfsclient sys/fs/nullfs sys/fs/unionfs sys/kern sys/sys sys/ufs/ufs sys/vm
Konstantin Belousov
kib at FreeBSD.org
Sun May 5 11:20:48 UTC 2019
Author: kib
Date: Sun May 5 11:20:43 2019
New Revision: 347151
URL: https://svnweb.freebsd.org/changeset/base/347151
Log:
Switch to use shared vnode locks for text files during image activation.
kern_execve() locks text vnode exclusive to be able to set and clear
VV_TEXT flag. VV_TEXT is mutually exclusive with the v_writecount > 0
condition.
The change removes VV_TEXT, replacing it with the condition
v_writecount <= -1, and puts v_writecount under the vnode interlock.
Each text reference decrements v_writecount. To clear the text
reference when the segment is unmapped, it is recorded in the
vm_map_entry backed by the text file as MAP_ENTRY_VN_TEXT flag, and
v_writecount is incremented on the map entry removal
The operations like VOP_ADD_WRITECOUNT() and VOP_SET_TEXT() check that
v_writecount does not contradict the desired change. vn_writecheck()
is now racy and its use was eliminated everywhere except access.
Atomic check for writeability and increment of v_writecount is
performed by the VOP. vn_truncate() now increments v_writecount
around VOP_SETATTR() call, lack of which is arguably a bug on its own.
nullfs bypasses v_writecount to the lower vnode always, so nullfs
vnode has its own v_writecount correct, and lower vnode gets all
references, since object->handle is always lower vnode.
On the text vnode' vm object dealloc, the v_writecount value is reset
to zero, and deadfs vop_unset_text short-circuit the operation.
Reclamation of lowervp always reclaims all nullfs vnodes referencing
lowervp first, so no stray references are left.
Reviewed by: markj, trasz
Tested by: mjg, pho
Sponsored by: The FreeBSD Foundation
MFC after: 1 month
Differential revision: https://reviews.freebsd.org/D19923
Modified:
head/libexec/rtld-elf/rtld.c
head/sys/compat/linux/linux_misc.c
head/sys/fs/deadfs/dead_vnops.c
head/sys/fs/nfsclient/nfs_clbio.c
head/sys/fs/nfsclient/nfs_clvnops.c
head/sys/fs/nullfs/null_vnops.c
head/sys/fs/unionfs/union_subr.c
head/sys/kern/imgact_aout.c
head/sys/kern/imgact_elf.c
head/sys/kern/kern_exec.c
head/sys/kern/vfs_default.c
head/sys/kern/vfs_subr.c
head/sys/kern/vfs_vnops.c
head/sys/kern/vnode_if.src
head/sys/sys/imgact.h
head/sys/sys/vnode.h
head/sys/ufs/ufs/ufs_extattr.c
head/sys/vm/vm_fault.c
head/sys/vm/vm_map.c
head/sys/vm/vm_map.h
head/sys/vm/vm_mmap.c
head/sys/vm/vm_object.c
head/sys/vm/vnode_pager.c
Modified: head/libexec/rtld-elf/rtld.c
==============================================================================
--- head/libexec/rtld-elf/rtld.c Sun May 5 11:06:19 2019 (r347150)
+++ head/libexec/rtld-elf/rtld.c Sun May 5 11:20:43 2019 (r347151)
@@ -458,7 +458,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entr
* others x bit is enabled.
* mmap(2) does not allow to mmap with PROT_EXEC if
* binary' file comes from noexec mount. We cannot
- * set VV_TEXT on the binary.
+ * set a text reference on the binary.
*/
dir_enable = false;
if (st.st_uid == geteuid()) {
Modified: head/sys/compat/linux/linux_misc.c
==============================================================================
--- head/sys/compat/linux/linux_misc.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/compat/linux/linux_misc.c Sun May 5 11:20:43 2019 (r347151)
@@ -258,13 +258,16 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
struct nameidata ni;
struct vnode *vp;
struct exec *a_out;
+ vm_map_t map;
+ vm_map_entry_t entry;
struct vattr attr;
vm_offset_t vmaddr;
unsigned long file_offset;
unsigned long bss_size;
char *library;
ssize_t aresid;
- int error, locked, writecount;
+ int error;
+ bool locked, opened, textset;
LCONVPATHEXIST(td, args->library, &library);
@@ -274,8 +277,10 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
#endif
a_out = NULL;
- locked = 0;
vp = NULL;
+ locked = false;
+ textset = false;
+ opened = false;
NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
UIO_SYSSPACE, library, td);
@@ -291,17 +296,8 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
* From here on down, we have a locked vnode that must be unlocked.
* XXX: The code below largely duplicates exec_check_permissions().
*/
- locked = 1;
+ locked = true;
- /* Writable? */
- error = VOP_GET_WRITECOUNT(vp, &writecount);
- if (error != 0)
- goto cleanup;
- if (writecount != 0) {
- error = ETXTBSY;
- goto cleanup;
- }
-
/* Executable? */
error = VOP_GETATTR(vp, &attr, td->td_ucred);
if (error)
@@ -339,6 +335,7 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
if (error)
goto cleanup;
+ opened = true;
/* Pull in executable header into exec_map */
error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
@@ -401,15 +398,16 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
/*
* Prevent more writers.
- * XXX: Note that if any of the VM operations fail below we don't
- * clear this flag.
*/
- VOP_SET_TEXT(vp);
+ error = VOP_SET_TEXT(vp);
+ if (error != 0)
+ goto cleanup;
+ textset = true;
/*
* Lock no longer needed
*/
- locked = 0;
+ locked = false;
VOP_UNLOCK(vp, 0);
/*
@@ -456,11 +454,21 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
* Map it all into the process's space as a single
* copy-on-write "data" segment.
*/
- error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
+ map = &td->td_proc->p_vmspace->vm_map;
+ error = vm_mmap(map, &vmaddr,
a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
if (error)
goto cleanup;
+ vm_map_lock(map);
+ if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
+ vm_map_unlock(map);
+ error = EDOOFUS;
+ goto cleanup;
+ }
+ entry->eflags |= MAP_ENTRY_VN_EXEC;
+ vm_map_unlock(map);
+ textset = false;
}
#ifdef DEBUG
printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0],
@@ -480,7 +488,14 @@ linux_uselib(struct thread *td, struct linux_uselib_ar
}
cleanup:
- /* Unlock vnode if needed */
+ if (opened) {
+ if (locked)
+ VOP_UNLOCK(vp, 0);
+ locked = false;
+ VOP_CLOSE(vp, FREAD, td->td_ucred, td);
+ }
+ if (textset)
+ VOP_UNSET_TEXT_CHECKED(vp);
if (locked)
VOP_UNLOCK(vp, 0);
Modified: head/sys/fs/deadfs/dead_vnops.c
==============================================================================
--- head/sys/fs/deadfs/dead_vnops.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/fs/deadfs/dead_vnops.c Sun May 5 11:20:43 2019 (r347151)
@@ -47,6 +47,7 @@ static vop_lookup_t dead_lookup;
static vop_open_t dead_open;
static vop_getwritemount_t dead_getwritemount;
static vop_rename_t dead_rename;
+static vop_unset_text_t dead_unset_text;
struct vop_vector dead_vnodeops = {
.vop_default = &default_vnodeops,
@@ -76,6 +77,7 @@ struct vop_vector dead_vnodeops = {
.vop_setattr = VOP_EBADF,
.vop_symlink = VOP_PANIC,
.vop_vptocnp = VOP_EBADF,
+ .vop_unset_text = dead_unset_text,
.vop_write = dead_write,
};
@@ -147,4 +149,11 @@ dead_rename(struct vop_rename_args *ap)
vop_rename_fail(ap);
return (EXDEV);
+}
+
+static int
+dead_unset_text(struct vop_unset_text_args *ap)
+{
+
+ return (0);
}
Modified: head/sys/fs/nfsclient/nfs_clbio.c
==============================================================================
--- head/sys/fs/nfsclient/nfs_clbio.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/fs/nfsclient/nfs_clbio.c Sun May 5 11:20:43 2019 (r347151)
@@ -1639,7 +1639,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucre
}
}
/* ASSERT_VOP_LOCKED(vp, "ncl_doio"); */
- if (p && (vp->v_vflag & VV_TEXT)) {
+ if (p && vp->v_writecount <= -1) {
mtx_lock(&np->n_mtx);
if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.na_mtime)) {
mtx_unlock(&np->n_mtx);
Modified: head/sys/fs/nfsclient/nfs_clvnops.c
==============================================================================
--- head/sys/fs/nfsclient/nfs_clvnops.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/fs/nfsclient/nfs_clvnops.c Sun May 5 11:20:43 2019 (r347151)
@@ -3442,8 +3442,7 @@ nfs_set_text(struct vop_set_text_args *ap)
np->n_mtime = np->n_vattr.na_mtime;
mtx_unlock(&np->n_mtx);
- vp->v_vflag |= VV_TEXT;
- return (0);
+ return (vop_stdset_text(ap));
}
/*
Modified: head/sys/fs/nullfs/null_vnops.c
==============================================================================
--- head/sys/fs/nullfs/null_vnops.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/fs/nullfs/null_vnops.c Sun May 5 11:20:43 2019 (r347151)
@@ -339,15 +339,15 @@ null_add_writecount(struct vop_add_writecount_args *ap
vp = ap->a_vp;
lvp = NULLVPTOLOWERVP(vp);
- KASSERT(vp->v_writecount + ap->a_inc >= 0, ("wrong writecount inc"));
- if (vp->v_writecount > 0 && vp->v_writecount + ap->a_inc == 0)
- error = VOP_ADD_WRITECOUNT(lvp, -1);
- else if (vp->v_writecount == 0 && vp->v_writecount + ap->a_inc > 0)
- error = VOP_ADD_WRITECOUNT(lvp, 1);
- else
- error = 0;
+ VI_LOCK(vp);
+ /* text refs are bypassed to lowervp */
+ VNASSERT(vp->v_writecount >= 0, vp, ("wrong null writecount"));
+ VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
+ ("wrong writecount inc %d", ap->a_inc));
+ error = VOP_ADD_WRITECOUNT(lvp, ap->a_inc);
if (error == 0)
vp->v_writecount += ap->a_inc;
+ VI_UNLOCK(vp);
return (error);
}
@@ -802,15 +802,17 @@ null_reclaim(struct vop_reclaim_args *ap)
vp->v_data = NULL;
vp->v_object = NULL;
vp->v_vnlock = &vp->v_lock;
- VI_UNLOCK(vp);
/*
- * If we were opened for write, we leased one write reference
+ * If we were opened for write, we leased the write reference
* to the lower vnode. If this is a reclamation due to the
* forced unmount, undo the reference now.
*/
if (vp->v_writecount > 0)
- VOP_ADD_WRITECOUNT(lowervp, -1);
+ VOP_ADD_WRITECOUNT(lowervp, -vp->v_writecount);
+
+ VI_UNLOCK(vp);
+
if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
vunref(lowervp);
else
Modified: head/sys/fs/unionfs/union_subr.c
==============================================================================
--- head/sys/fs/unionfs/union_subr.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/fs/unionfs/union_subr.c Sun May 5 11:20:43 2019 (r347151)
@@ -941,10 +941,14 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct
vput(vp);
goto unionfs_vn_create_on_upper_free_out1;
}
- VOP_ADD_WRITECOUNT(vp, 1);
+ error = VOP_ADD_WRITECOUNT(vp, 1);
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp,
vp->v_writecount);
- *vpp = vp;
+ if (error == 0) {
+ *vpp = vp;
+ } else {
+ VOP_CLOSE(vp, fmode, cred, td);
+ }
unionfs_vn_create_on_upper_free_out1:
VOP_UNLOCK(udvp, LK_RELEASE);
@@ -1078,7 +1082,7 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy,
}
}
VOP_CLOSE(uvp, FWRITE, cred, td);
- VOP_ADD_WRITECOUNT(uvp, -1);
+ VOP_ADD_WRITECOUNT_CHECKED(uvp, -1);
CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
uvp->v_writecount);
Modified: head/sys/kern/imgact_aout.c
==============================================================================
--- head/sys/kern/imgact_aout.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/imgact_aout.c Sun May 5 11:20:43 2019 (r347151)
@@ -247,8 +247,8 @@ exec_aout_imgact(struct image_params *imgp)
/* data + bss can't exceed rlimit */
a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) ||
racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) {
- PROC_UNLOCK(imgp->proc);
- return (ENOMEM);
+ PROC_UNLOCK(imgp->proc);
+ return (ENOMEM);
}
PROC_UNLOCK(imgp->proc);
@@ -267,7 +267,7 @@ exec_aout_imgact(struct image_params *imgp)
*/
error = exec_new_vmspace(imgp, &aout_sysvec);
- vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
if (error)
return (error);
@@ -286,12 +286,13 @@ exec_aout_imgact(struct image_params *imgp)
file_offset,
virtual_offset, text_end,
VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
- MAP_COPY_ON_WRITE | MAP_PREFAULT);
+ MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC);
if (error) {
vm_map_unlock(map);
vm_object_deallocate(object);
return (error);
}
+ VOP_SET_TEXT_CHECKED(imgp->vp);
data_end = text_end + a_out->a_data;
if (a_out->a_data) {
vm_object_reference(object);
@@ -299,12 +300,13 @@ exec_aout_imgact(struct image_params *imgp)
file_offset + a_out->a_text,
text_end, data_end,
VM_PROT_ALL, VM_PROT_ALL,
- MAP_COPY_ON_WRITE | MAP_PREFAULT);
+ MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC);
if (error) {
vm_map_unlock(map);
vm_object_deallocate(object);
return (error);
}
+ VOP_SET_TEXT_CHECKED(imgp->vp);
}
if (bss_size) {
Modified: head/sys/kern/imgact_elf.c
==============================================================================
--- head/sys/kern/imgact_elf.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/imgact_elf.c Sun May 5 11:20:43 2019 (r347151)
@@ -526,13 +526,17 @@ __elfN(map_insert)(struct image_params *imgp, vm_map_t
} else {
vm_object_reference(object);
rv = vm_map_fixed(map, object, offset, start, end - start,
- prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL);
+ prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL |
+ (object != NULL ? MAP_VN_EXEC : 0));
if (rv != KERN_SUCCESS) {
locked = VOP_ISLOCKED(imgp->vp);
VOP_UNLOCK(imgp->vp, 0);
vm_object_deallocate(object);
vn_lock(imgp->vp, locked | LK_RETRY);
return (rv);
+ } else if (object != NULL) {
+ MPASS(imgp->vp->v_object == object);
+ VOP_SET_TEXT_CHECKED(imgp->vp);
}
}
return (KERN_SUCCESS);
@@ -589,13 +593,8 @@ __elfN(load_section)(struct image_params *imgp, vm_oof
cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
(prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
- rv = __elfN(map_insert)(imgp, map,
- object,
- file_addr, /* file offset */
- map_addr, /* virtual start */
- map_addr + map_len,/* virtual end */
- prot,
- cow);
+ rv = __elfN(map_insert)(imgp, map, object, file_addr,
+ map_addr, map_addr + map_len, prot, cow);
if (rv != KERN_SUCCESS)
return (EINVAL);
@@ -716,7 +715,7 @@ __elfN(load_file)(struct proc *p, const char *file, u_
struct nameidata *nd;
struct vattr *attr;
struct image_params *imgp;
- u_long flags, rbase;
+ u_long rbase;
u_long base_addr = 0;
int error;
@@ -744,10 +743,8 @@ __elfN(load_file)(struct proc *p, const char *file, u_
imgp->object = NULL;
imgp->execlabel = NULL;
- flags = FOLLOW | LOCKSHARED | LOCKLEAF;
-
-again:
- NDINIT(nd, LOOKUP, flags, UIO_SYSSPACE, file, curthread);
+ NDINIT(nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, file,
+ curthread);
if ((error = namei(nd)) != 0) {
nd->ni_vp = NULL;
goto fail;
@@ -762,27 +759,6 @@ again:
if (error)
goto fail;
- /*
- * Also make certain that the interpreter stays the same,
- * so set its VV_TEXT flag, too. Since this function is only
- * used to load the interpreter, the VV_TEXT is almost always
- * already set.
- */
- if (VOP_IS_TEXT(nd->ni_vp) == 0) {
- if (VOP_ISLOCKED(nd->ni_vp) != LK_EXCLUSIVE) {
- /*
- * LK_UPGRADE could have resulted in dropping
- * the lock. Just try again from the start,
- * this time with exclusive vnode lock.
- */
- vput(nd->ni_vp);
- flags &= ~LOCKSHARED;
- goto again;
- }
-
- VOP_SET_TEXT(nd->ni_vp);
- }
-
error = exec_map_first_page(imgp);
if (error)
goto fail;
@@ -825,9 +801,11 @@ fail:
if (imgp->firstpage)
exec_unmap_first_page(imgp);
- if (nd->ni_vp)
+ if (nd->ni_vp) {
+ if (imgp->textset)
+ VOP_UNSET_TEXT_CHECKED(nd->ni_vp);
vput(nd->ni_vp);
-
+ }
free(tempdata, M_TEMP);
return (error);
@@ -961,7 +939,7 @@ __elfN(get_interp)(struct image_params *imgp, const El
if (interp == NULL) {
VOP_UNLOCK(imgp->vp, 0);
interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK);
- vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
}
error = vn_rdwr(UIO_READ, imgp->vp, interp,
interp_name_len, phdr->p_offset,
@@ -1228,7 +1206,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
maxv / 2, 1UL << flsl(maxalign));
}
- vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
if (error != 0)
goto ret;
@@ -1272,7 +1250,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
}
error = __elfN(load_interp)(imgp, brand_info, interp, &addr,
&imgp->entry_addr);
- vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
if (error != 0)
goto ret;
} else
@@ -1285,7 +1263,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *i
if (elf_auxargs == NULL) {
VOP_UNLOCK(imgp->vp, 0);
elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
- vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
}
elf_auxargs->execfd = -1;
elf_auxargs->phdr = proghdr + et_dyn_addr;
@@ -2570,7 +2548,7 @@ __elfN(parse_notes)(struct image_params *imgp, Elf_Not
if (buf == NULL) {
VOP_UNLOCK(imgp->vp, 0);
buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK);
- vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
}
error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz,
pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED,
Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/kern_exec.c Sun May 5 11:20:43 2019 (r347151)
@@ -375,7 +375,6 @@ do_execve(struct thread *td, struct image_args *args,
#endif
struct vnode *oldtextvp = NULL, *newtextvp;
int credential_changing;
- int textset;
#ifdef MAC
struct label *interpvplabel = NULL;
int will_transition;
@@ -423,8 +422,8 @@ do_execve(struct thread *td, struct image_args *args,
* interpreter if this is an interpreted binary.
*/
if (args->fname != NULL) {
- NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
- | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
+ NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
+ SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
}
SDT_PROBE1(proc, , , exec, args->fname);
@@ -457,13 +456,14 @@ interpret:
error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp);
if (error)
goto exec_fail;
- vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY);
+ vn_lock(newtextvp, LK_SHARED | LK_RETRY);
AUDIT_ARG_VNODE1(newtextvp);
imgp->vp = newtextvp;
}
/*
- * Check file permissions (also 'opens' file)
+ * Check file permissions. Also 'opens' file and sets its vnode to
+ * text mode.
*/
error = exec_check_permissions(imgp);
if (error)
@@ -473,16 +473,6 @@ interpret:
if (imgp->object != NULL)
vm_object_reference(imgp->object);
- /*
- * Set VV_TEXT now so no one can write to the executable while we're
- * activating it.
- *
- * Remember if this was set before and unset it in case this is not
- * actually an executable image.
- */
- textset = VOP_IS_TEXT(imgp->vp);
- VOP_SET_TEXT(imgp->vp);
-
error = exec_map_first_page(imgp);
if (error)
goto exec_fail_dealloc;
@@ -610,11 +600,8 @@ interpret:
}
if (error) {
- if (error == -1) {
- if (textset == 0)
- VOP_UNSET_TEXT(imgp->vp);
+ if (error == -1)
error = ENOEXEC;
- }
goto exec_fail_dealloc;
}
@@ -625,12 +612,13 @@ interpret:
if (imgp->interpreted) {
exec_unmap_first_page(imgp);
/*
- * VV_TEXT needs to be unset for scripts. There is a short
- * period before we determine that something is a script where
- * VV_TEXT will be set. The vnode lock is held over this
- * entire period so nothing should illegitimately be blocked.
+ * The text reference needs to be removed for scripts.
+ * There is a short period before we determine that
+ * something is a script where text reference is active.
+ * The vnode lock is held over this entire period
+ * so nothing should illegitimately be blocked.
*/
- VOP_UNSET_TEXT(imgp->vp);
+ VOP_UNSET_TEXT_CHECKED(imgp->vp);
/* free name buffer and old vnode */
if (args->fname != NULL)
NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -886,6 +874,8 @@ exec_fail_dealloc:
NDFREE(&nd, NDF_ONLY_PNBUF);
if (imgp->opened)
VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
+ if (imgp->textset)
+ VOP_UNSET_TEXT_CHECKED(imgp->vp);
if (error != 0)
vput(imgp->vp);
else
@@ -1706,7 +1696,7 @@ exec_check_permissions(struct image_params *imgp)
struct vnode *vp = imgp->vp;
struct vattr *attr = imgp->attr;
struct thread *td;
- int error, writecount;
+ int error;
td = curthread;
@@ -1750,12 +1740,17 @@ exec_check_permissions(struct image_params *imgp)
/*
* Check number of open-for-writes on the file and deny execution
* if there are any.
+ *
+ * Add a text reference now so no one can write to the
+ * executable while we're activating it.
+ *
+ * Remember if this was set before and unset it in case this is not
+ * actually an executable image.
*/
- error = VOP_GET_WRITECOUNT(vp, &writecount);
+ error = VOP_SET_TEXT(vp);
if (error != 0)
return (error);
- if (writecount != 0)
- return (ETXTBSY);
+ imgp->textset = true;
/*
* Call filesystem specific open routine (which does nothing in the
Modified: head/sys/kern/vfs_default.c
==============================================================================
--- head/sys/kern/vfs_default.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/vfs_default.c Sun May 5 11:20:43 2019 (r347151)
@@ -81,9 +81,7 @@ static int dirent_exists(struct vnode *vp, const char
#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
static int vop_stdis_text(struct vop_is_text_args *ap);
-static int vop_stdset_text(struct vop_set_text_args *ap);
static int vop_stdunset_text(struct vop_unset_text_args *ap);
-static int vop_stdget_writecount(struct vop_get_writecount_args *ap);
static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
@@ -141,7 +139,6 @@ struct vop_vector default_vnodeops = {
.vop_is_text = vop_stdis_text,
.vop_set_text = vop_stdset_text,
.vop_unset_text = vop_stdunset_text,
- .vop_get_writecount = vop_stdget_writecount,
.vop_add_writecount = vop_stdadd_writecount,
};
@@ -1070,39 +1067,63 @@ static int
vop_stdis_text(struct vop_is_text_args *ap)
{
- return ((ap->a_vp->v_vflag & VV_TEXT) != 0);
+ return (ap->a_vp->v_writecount < 0);
}
-static int
+int
vop_stdset_text(struct vop_set_text_args *ap)
{
+ struct vnode *vp;
+ int error;
- ap->a_vp->v_vflag |= VV_TEXT;
- return (0);
+ vp = ap->a_vp;
+ VI_LOCK(vp);
+ if (vp->v_writecount > 0) {
+ error = ETXTBSY;
+ } else {
+ vp->v_writecount--;
+ error = 0;
+ }
+ VI_UNLOCK(vp);
+ return (error);
}
static int
vop_stdunset_text(struct vop_unset_text_args *ap)
{
+ struct vnode *vp;
+ int error;
- ap->a_vp->v_vflag &= ~VV_TEXT;
- return (0);
+ vp = ap->a_vp;
+ VI_LOCK(vp);
+ if (vp->v_writecount < 0) {
+ vp->v_writecount++;
+ error = 0;
+ } else {
+ error = EINVAL;
+ }
+ VI_UNLOCK(vp);
+ return (error);
}
static int
-vop_stdget_writecount(struct vop_get_writecount_args *ap)
-{
-
- *ap->a_writecount = ap->a_vp->v_writecount;
- return (0);
-}
-
-static int
vop_stdadd_writecount(struct vop_add_writecount_args *ap)
{
+ struct vnode *vp;
+ int error;
- ap->a_vp->v_writecount += ap->a_inc;
- return (0);
+ vp = ap->a_vp;
+ VI_LOCK(vp);
+ if (vp->v_writecount < 0) {
+ error = ETXTBSY;
+ } else {
+ VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
+ ("neg writecount increment %d", ap->a_inc));
+ vp->v_writecount += ap->a_inc;
+ error = 0;
+ }
+ VI_UNLOCK(vp);
+ return (error);
}
/*
Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/vfs_subr.c Sun May 5 11:20:43 2019 (r347151)
@@ -3491,8 +3491,6 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
strlcat(buf, "|VV_ETERNALDEV", sizeof(buf));
if (vp->v_vflag & VV_CACHEDLABEL)
strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf));
- if (vp->v_vflag & VV_TEXT)
- strlcat(buf, "|VV_TEXT", sizeof(buf));
if (vp->v_vflag & VV_COPYONWRITE)
strlcat(buf, "|VV_COPYONWRITE", sizeof(buf));
if (vp->v_vflag & VV_SYSTEM)
@@ -3508,7 +3506,7 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
if (vp->v_vflag & VV_FORCEINSMQ)
strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf));
flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV |
- VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
+ VV_CACHEDLABEL | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ);
if (flags != 0) {
snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags);
Modified: head/sys/kern/vfs_vnops.c
==============================================================================
--- head/sys/kern/vfs_vnops.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/vfs_vnops.c Sun May 5 11:20:43 2019 (r347151)
@@ -294,6 +294,39 @@ bad:
return (error);
}
+static int
+vn_open_vnode_advlock(struct vnode *vp, int fmode, struct file *fp)
+{
+ struct flock lf;
+ int error, lock_flags, type;
+
+ ASSERT_VOP_LOCKED(vp, "vn_open_vnode_advlock");
+ if ((fmode & (O_EXLOCK | O_SHLOCK)) == 0)
+ return (0);
+ KASSERT(fp != NULL, ("open with flock requires fp"));
+ if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE)
+ return (EOPNOTSUPP);
+
+ lock_flags = VOP_ISLOCKED(vp);
+ VOP_UNLOCK(vp, 0);
+
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = (fmode & O_EXLOCK) != 0 ? F_WRLCK : F_RDLCK;
+ type = F_FLOCK;
+ if ((fmode & FNONBLOCK) == 0)
+ type |= F_WAIT;
+ error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
+ if (error == 0)
+ fp->f_flag |= FHASLOCK;
+
+ vn_lock(vp, lock_flags | LK_RETRY);
+ if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
+ error = ENOENT;
+ return (error);
+}
+
/*
* Common code for vnode open operations once a vnode is located.
* Check permissions, and call the VOP_OPEN routine.
@@ -303,8 +336,7 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucre
struct thread *td, struct file *fp)
{
accmode_t accmode;
- struct flock lf;
- int error, lock_flags, type;
+ int error;
if (vp->v_type == VLNK)
return (EMLINK);
@@ -335,63 +367,31 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucre
accmode &= ~(VCREAT | VVERIFY);
#endif
- if ((fmode & O_CREAT) == 0) {
- if (accmode & VWRITE) {
- error = vn_writechk(vp);
- if (error)
- return (error);
- }
- if (accmode) {
- error = VOP_ACCESS(vp, accmode, cred, td);
- if (error)
- return (error);
- }
+ if ((fmode & O_CREAT) == 0 && accmode != 0) {
+ error = VOP_ACCESS(vp, accmode, cred, td);
+ if (error != 0)
+ return (error);
}
if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
vn_lock(vp, LK_UPGRADE | LK_RETRY);
- if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
+ error = VOP_OPEN(vp, fmode, cred, td, fp);
+ if (error != 0)
return (error);
- while ((fmode & (O_EXLOCK | O_SHLOCK)) != 0) {
- KASSERT(fp != NULL, ("open with flock requires fp"));
- if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE) {
- error = EOPNOTSUPP;
- break;
+ error = vn_open_vnode_advlock(vp, fmode, fp);
+ if (error == 0 && (fmode & FWRITE) != 0) {
+ error = VOP_ADD_WRITECOUNT(vp, 1);
+ if (error == 0) {
+ CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
+ __func__, vp, vp->v_writecount);
}
- lock_flags = VOP_ISLOCKED(vp);
- VOP_UNLOCK(vp, 0);
- lf.l_whence = SEEK_SET;
- lf.l_start = 0;
- lf.l_len = 0;
- if (fmode & O_EXLOCK)
- lf.l_type = F_WRLCK;
- else
- lf.l_type = F_RDLCK;
- type = F_FLOCK;
- if ((fmode & FNONBLOCK) == 0)
- type |= F_WAIT;
- error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
- if (error == 0)
- fp->f_flag |= FHASLOCK;
- vn_lock(vp, lock_flags | LK_RETRY);
- if (error != 0)
- break;
- if ((vp->v_iflag & VI_DOOMED) != 0) {
- error = ENOENT;
- break;
- }
-
- /*
- * Another thread might have used this vnode as an
- * executable while the vnode lock was dropped.
- * Ensure the vnode is still able to be opened for
- * writing after the lock has been obtained.
- */
- if ((accmode & VWRITE) != 0)
- error = vn_writechk(vp);
- break;
}
+ /*
+ * Error from advlock or VOP_ADD_WRITECOUNT() still requires
+ * calling VOP_CLOSE() to pair with earlier VOP_OPEN().
+ * Arrange for that by having fdrop() to use vn_closefile().
+ */
if (error != 0) {
fp->f_flag |= FOPENFAILED;
fp->f_vnode = vp;
@@ -400,18 +400,17 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucre
fp->f_ops = &vnops;
}
vref(vp);
- } else if ((fmode & FWRITE) != 0) {
- VOP_ADD_WRITECOUNT(vp, 1);
- CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
- __func__, vp, vp->v_writecount);
}
+
ASSERT_VOP_LOCKED(vp, "vn_open_vnode");
return (error);
+
}
/*
* Check for write permissions on the specified vnode.
* Prototype text segments cannot be written.
+ * It is racy.
*/
int
vn_writechk(struct vnode *vp)
@@ -449,9 +448,7 @@ vn_close1(struct vnode *vp, int flags, struct ucred *f
vn_lock(vp, lock_flags | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
if ((flags & (FWRITE | FOPENFAILED)) == FWRITE) {
- VNASSERT(vp->v_writecount > 0, vp,
- ("vn_close: negative writecount"));
- VOP_ADD_WRITECOUNT(vp, -1);
+ VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
__func__, vp, vp->v_writecount);
}
@@ -1319,13 +1316,14 @@ vn_truncate(struct file *fp, off_t length, struct ucre
if (error)
goto out;
#endif
- error = vn_writechk(vp);
+ error = VOP_ADD_WRITECOUNT(vp, 1);
if (error == 0) {
VATTR_NULL(&vattr);
vattr.va_size = length;
if ((fp->f_flag & O_FSYNC) != 0)
vattr.va_vaflags |= VA_SYNC;
error = VOP_SETATTR(vp, &vattr, fp->f_cred);
+ VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
}
out:
VOP_UNLOCK(vp, 0);
Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/kern/vnode_if.src Sun May 5 11:20:43 2019 (r347151)
@@ -688,29 +688,21 @@ vop_is_text {
};
-%% set_text vp E E E
+%% set_text vp = = =
vop_set_text {
IN struct vnode *vp;
};
-%% vop_unset_text vp E E E
+%% vop_unset_text vp = = =
vop_unset_text {
IN struct vnode *vp;
};
-%% get_writecount vp L L L
-
-vop_get_writecount {
- IN struct vnode *vp;
- OUT int *writecount;
-};
-
-
-%% add_writecount vp E E E
+%% add_writecount vp L L L
vop_add_writecount {
IN struct vnode *vp;
Modified: head/sys/sys/imgact.h
==============================================================================
--- head/sys/sys/imgact.h Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/sys/imgact.h Sun May 5 11:20:43 2019 (r347151)
@@ -89,6 +89,7 @@ struct image_params {
u_long stack_sz;
struct ucred *newcred; /* new credentials if changing */
bool credential_setid; /* true if becoming setid */
+ bool textset;
u_int map_flags;
};
Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/sys/vnode.h Sun May 5 11:20:43 2019 (r347151)
@@ -169,7 +169,8 @@ struct vnode {
u_int v_iflag; /* i vnode flags (see below) */
u_int v_vflag; /* v vnode flags */
u_int v_mflag; /* l mnt-specific vnode flags */
- int v_writecount; /* v ref count of writers */
+ int v_writecount; /* I ref count of writers or
+ (negative) text users */
u_int v_hash;
enum vtype v_type; /* u vnode type */
};
@@ -244,7 +245,6 @@ struct xvnode {
#define VV_NOSYNC 0x0004 /* unlinked, stop syncing */
#define VV_ETERNALDEV 0x0008 /* device that is never destroyed */
#define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */
-#define VV_TEXT 0x0020 /* vnode is a pure text prototype */
#define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */
#define VV_SYSTEM 0x0080 /* vnode being used by kernel */
#define VV_PROCDEP 0x0100 /* vnode is process dependent */
@@ -749,6 +749,7 @@ int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
int vop_stdallocate(struct vop_allocate_args *ap);
+int vop_stdset_text(struct vop_set_text_args *ap);
int vop_stdpathconf(struct vop_pathconf_args *);
int vop_stdpoll(struct vop_poll_args *);
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
@@ -828,6 +829,33 @@ void vop_rename_fail(struct vop_rename_args *ap);
#define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
+#ifdef INVARIANTS
+#define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) \
+do { \
+ int error_; \
+ \
+ error_ = VOP_ADD_WRITECOUNT((vp), (cnt)); \
+ MPASS(error_ == 0); \
+} while (0)
+#define VOP_SET_TEXT_CHECKED(vp) \
+do { \
+ int error_; \
+ \
+ error_ = VOP_SET_TEXT((vp)); \
+ MPASS(error_ == 0); \
+} while (0)
+#define VOP_UNSET_TEXT_CHECKED(vp) \
+do { \
+ int error_; \
+ \
+ error_ = VOP_UNSET_TEXT((vp)); \
+ MPASS(error_ == 0); \
+} while (0)
+#else
+#define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) VOP_ADD_WRITECOUNT((vp), (cnt))
+#define VOP_SET_TEXT_CHECKED(vp) VOP_SET_TEXT((vp))
+#define VOP_UNSET_TEXT_CHECKED(vp) VOP_UNSET_TEXT((vp))
+#endif
void vput(struct vnode *vp);
void vrele(struct vnode *vp);
Modified: head/sys/ufs/ufs/ufs_extattr.c
==============================================================================
--- head/sys/ufs/ufs/ufs_extattr.c Sun May 5 11:06:19 2019 (r347150)
+++ head/sys/ufs/ufs/ufs_extattr.c Sun May 5 11:20:43 2019 (r347151)
@@ -338,7 +338,12 @@ ufs_extattr_enable_with_open(struct ufsmount *ump, str
return (error);
}
- VOP_ADD_WRITECOUNT(vp, 1);
+ error = VOP_ADD_WRITECOUNT(vp, 1);
+ if (error != 0) {
+ VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list