svn commit: r342628 - in head/sys/compat/linuxkpi/common: include/linux src
Konstantin Belousov
kib at FreeBSD.org
Sun Dec 30 15:46:47 UTC 2018
Author: kib
Date: Sun Dec 30 15:46:45 2018
New Revision: 342628
URL: https://svnweb.freebsd.org/changeset/base/342628
Log:
Fix linux_destroy_dev() behaviour when there are still files open from
the destroying cdev.
Currently linux_destroy_dev() waits for the reference count on the
linux cdev to drain, and each open file hold the reference.
Practically it means that linux_destroy_dev() is blocked until all
userspace processes that have the cdev open, exit. FreeBSD devfs does
not have such problem, because device refcount only prevents freeing
of the cdev memory, and separate 'active methods' counter blocks
destroy_dev() until all threads leave the cdevsw methods. After that,
attempts to enter cdevsw methods are refused with an error.
Implement somewhat similar mechanism for LinuxKPI cdevs. Demote cdev
refcount to only mean a hold on the linux cdev memory. Add sirefs
count to track both number of threads inside the cdev methods, and for
single-bit indicator that cdev is being destroyed. In the later case,
the call is redirected to the dummy cdev.
Reviewed by: markj
Discussed with: hselasky
Tested by: zeising
MFC after: 1 week
Sponsored by: Mellanox Technologies
Differential revision: https://reviews.freebsd.org/D18606
Modified:
head/sys/compat/linuxkpi/common/include/linux/cdev.h
head/sys/compat/linuxkpi/common/src/linux_compat.c
Modified: head/sys/compat/linuxkpi/common/include/linux/cdev.h
==============================================================================
--- head/sys/compat/linuxkpi/common/include/linux/cdev.h Sun Dec 30 15:38:07 2018 (r342627)
+++ head/sys/compat/linuxkpi/common/include/linux/cdev.h Sun Dec 30 15:46:45 2018 (r342628)
@@ -52,7 +52,8 @@ struct linux_cdev {
struct cdev *cdev;
dev_t dev;
const struct file_operations *ops;
- atomic_long_t refs;
+ u_int refs;
+ u_int siref;
};
static inline void
@@ -61,7 +62,7 @@ cdev_init(struct linux_cdev *cdev, const struct file_o
kobject_init(&cdev->kobj, &linux_cdev_static_ktype);
cdev->ops = ops;
- atomic_long_set(&cdev->refs, 0);
+ cdev->refs = 1;
}
static inline struct linux_cdev *
@@ -70,8 +71,9 @@ cdev_alloc(void)
struct linux_cdev *cdev;
cdev = kzalloc(sizeof(struct linux_cdev), M_WAITOK);
- if (cdev)
+ if (cdev != NULL)
kobject_init(&cdev->kobj, &linux_cdev_ktype);
+ cdev->refs = 1;
return (cdev);
}
Modified: head/sys/compat/linuxkpi/common/src/linux_compat.c
==============================================================================
--- head/sys/compat/linuxkpi/common/src/linux_compat.c Sun Dec 30 15:38:07 2018 (r342627)
+++ head/sys/compat/linuxkpi/common/src/linux_compat.c Sun Dec 30 15:46:45 2018 (r342628)
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/sglist.h>
#include <sys/sleepqueue.h>
+#include <sys/refcount.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/bus.h>
@@ -100,6 +101,7 @@ MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compa
#undef cdev
#define RB_ROOT(head) (head)->rbh_root
+static void linux_cdev_deref(struct linux_cdev *ldev);
static struct vm_area_struct *linux_cdev_handle_find(void *handle);
struct kobject linux_class_root;
@@ -691,6 +693,52 @@ zap_vma_ptes(struct vm_area_struct *vma, unsigned long
return (0);
}
+static struct file_operations dummy_ldev_ops = {
+ /* XXXKIB */
+};
+
+static struct linux_cdev dummy_ldev = {
+ .ops = &dummy_ldev_ops,
+};
+
+#define LDEV_SI_DTR 0x0001
+#define LDEV_SI_REF 0x0002
+
+static void
+linux_get_fop(struct linux_file *filp, const struct file_operations **fop,
+ struct linux_cdev **dev)
+{
+ struct linux_cdev *ldev;
+ u_int siref;
+
+ ldev = filp->f_cdev;
+ *fop = filp->f_op;
+ if (ldev != NULL) {
+ for (siref = ldev->siref;;) {
+ if ((siref & LDEV_SI_DTR) != 0) {
+ ldev = &dummy_ldev;
+ siref = ldev->siref;
+ *fop = ldev->ops;
+ MPASS((ldev->siref & LDEV_SI_DTR) == 0);
+ } else if (atomic_fcmpset_int(&ldev->siref, &siref,
+ siref + LDEV_SI_REF)) {
+ break;
+ }
+ }
+ }
+ *dev = ldev;
+}
+
+static void
+linux_drop_fop(struct linux_cdev *ldev)
+{
+
+ if (ldev == NULL)
+ return;
+ MPASS((ldev->siref & ~LDEV_SI_DTR) != 0);
+ atomic_subtract_int(&ldev->siref, LDEV_SI_REF);
+}
+
#define OPW(fp,td,code) ({ \
struct file *__fpop; \
__typeof(code) __retval; \
@@ -703,10 +751,12 @@ zap_vma_ptes(struct vm_area_struct *vma, unsigned long
})
static int
-linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *file)
+linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td,
+ struct file *file)
{
struct linux_cdev *ldev;
struct linux_file *filp;
+ const struct file_operations *fop;
int error;
ldev = dev->si_drv1;
@@ -718,20 +768,17 @@ linux_dev_fdopen(struct cdev *dev, int fflags, struct
filp->f_flags = file->f_flag;
filp->f_vnode = file->f_vnode;
filp->_file = file;
+ refcount_acquire(&ldev->refs);
filp->f_cdev = ldev;
linux_set_current(td);
+ linux_get_fop(filp, &fop, &ldev);
- /* get a reference on the Linux character device */
- if (atomic_long_add_unless(&ldev->refs, 1, -1L) == 0) {
- kfree(filp);
- return (EINVAL);
- }
-
- if (filp->f_op->open) {
- error = -filp->f_op->open(file->f_vnode, filp);
- if (error) {
- atomic_long_dec(&ldev->refs);
+ if (fop->open != NULL) {
+ error = -fop->open(file->f_vnode, filp);
+ if (error != 0) {
+ linux_drop_fop(ldev);
+ linux_cdev_deref(filp->f_cdev);
kfree(filp);
return (error);
}
@@ -742,6 +789,7 @@ linux_dev_fdopen(struct cdev *dev, int fflags, struct
/* release the file from devfs */
finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
+ linux_drop_fop(ldev);
return (ENXIO);
}
@@ -877,7 +925,8 @@ linux_get_error(struct task_struct *task, int error)
static int
linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
- u_long cmd, caddr_t data, struct thread *td)
+ const struct file_operations *fop, u_long cmd, caddr_t data,
+ struct thread *td)
{
struct task_struct *task = current;
unsigned size;
@@ -902,20 +951,28 @@ linux_file_ioctl_sub(struct file *fp, struct linux_fil
#if defined(__amd64__)
if (td->td_proc->p_elf_machine == EM_386) {
/* try the compat IOCTL handler first */
- if (filp->f_op->compat_ioctl != NULL)
- error = -OPW(fp, td, filp->f_op->compat_ioctl(filp, cmd, (u_long)data));
- else
+ if (fop->compat_ioctl != NULL) {
+ error = -OPW(fp, td, fop->compat_ioctl(filp,
+ cmd, (u_long)data));
+ } else {
error = ENOTTY;
+ }
/* fallback to the regular IOCTL handler, if any */
- if (error == ENOTTY && filp->f_op->unlocked_ioctl != NULL)
- error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data));
+ if (error == ENOTTY && fop->unlocked_ioctl != NULL) {
+ error = -OPW(fp, td, fop->unlocked_ioctl(filp,
+ cmd, (u_long)data));
+ }
} else
#endif
- if (filp->f_op->unlocked_ioctl != NULL)
- error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data));
- else
- error = ENOTTY;
+ {
+ if (fop->unlocked_ioctl != NULL) {
+ error = -OPW(fp, td, fop->unlocked_ioctl(filp,
+ cmd, (u_long)data));
+ } else {
+ error = ENOTTY;
+ }
+ }
if (size > 0) {
task->bsd_ioctl_data = NULL;
task->bsd_ioctl_len = 0;
@@ -1084,30 +1141,36 @@ static struct filterops linux_dev_kqfiltops_write = {
static void
linux_file_kqfilter_poll(struct linux_file *filp, int kqflags)
{
+ struct thread *td;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
int temp;
- if (filp->f_kqflags & kqflags) {
- struct thread *td = curthread;
+ if ((filp->f_kqflags & kqflags) == 0)
+ return;
- /* get the latest polling state */
- temp = OPW(filp->_file, td, filp->f_op->poll(filp, NULL));
+ td = curthread;
- spin_lock(&filp->f_kqlock);
- /* clear kqflags */
- filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ |
- LINUX_KQ_FLAG_NEED_WRITE);
- /* update kqflags */
- if (temp & (POLLIN | POLLOUT)) {
- if (temp & POLLIN)
- filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ;
- if (temp & POLLOUT)
- filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE;
+ linux_get_fop(filp, &fop, &ldev);
+ /* get the latest polling state */
+ temp = OPW(filp->_file, td, fop->poll(filp, NULL));
+ linux_drop_fop(ldev);
- /* make sure the "knote" gets woken up */
- KNOTE_LOCKED(&filp->f_selinfo.si_note, 0);
- }
- spin_unlock(&filp->f_kqlock);
+ spin_lock(&filp->f_kqlock);
+ /* clear kqflags */
+ filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ |
+ LINUX_KQ_FLAG_NEED_WRITE);
+ /* update kqflags */
+ if ((temp & (POLLIN | POLLOUT)) != 0) {
+ if ((temp & POLLIN) != 0)
+ filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ;
+ if ((temp & POLLOUT) != 0)
+ filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE;
+
+ /* make sure the "knote" gets woken up */
+ KNOTE_LOCKED(&filp->f_selinfo.si_note, 0);
}
+ spin_unlock(&filp->f_kqlock);
}
static int
@@ -1156,9 +1219,9 @@ linux_file_kqfilter(struct file *file, struct knote *k
}
static int
-linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset,
- vm_size_t size, struct vm_object **object, int nprot,
- struct thread *td)
+linux_file_mmap_single(struct file *fp, const struct file_operations *fop,
+ vm_ooffset_t *offset, vm_size_t size, struct vm_object **object,
+ int nprot, struct thread *td)
{
struct task_struct *task;
struct vm_area_struct *vmap;
@@ -1170,7 +1233,7 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *
filp = (struct linux_file *)fp->f_data;
filp->f_flags = fp->f_flag;
- if (filp->f_op->mmap == NULL)
+ if (fop->mmap == NULL)
return (EOPNOTSUPP);
linux_set_current(td);
@@ -1200,7 +1263,7 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *
if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
error = linux_get_error(task, EINTR);
} else {
- error = -OPW(fp, td, filp->f_op->mmap(filp, vmap));
+ error = -OPW(fp, td, fop->mmap(filp, vmap));
error = linux_get_error(task, error);
up_write(&vmap->vm_mm->mmap_sem);
}
@@ -1319,6 +1382,8 @@ linux_file_read(struct file *file, struct uio *uio, st
int flags, struct thread *td)
{
struct linux_file *filp;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
ssize_t bytes;
int error;
@@ -1331,8 +1396,10 @@ linux_file_read(struct file *file, struct uio *uio, st
if (uio->uio_resid > DEVFS_IOSIZE_MAX)
return (EINVAL);
linux_set_current(td);
- if (filp->f_op->read) {
- bytes = OPW(file, td, filp->f_op->read(filp, uio->uio_iov->iov_base,
+ linux_get_fop(filp, &fop, &ldev);
+ if (fop->read != NULL) {
+ bytes = OPW(file, td, fop->read(filp,
+ uio->uio_iov->iov_base,
uio->uio_iov->iov_len, &uio->uio_offset));
if (bytes >= 0) {
uio->uio_iov->iov_base =
@@ -1347,6 +1414,7 @@ linux_file_read(struct file *file, struct uio *uio, st
/* update kqfilter status, if any */
linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ);
+ linux_drop_fop(ldev);
return (error);
}
@@ -1356,10 +1424,11 @@ linux_file_write(struct file *file, struct uio *uio, s
int flags, struct thread *td)
{
struct linux_file *filp;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
ssize_t bytes;
int error;
- error = 0;
filp = (struct linux_file *)file->f_data;
filp->f_flags = file->f_flag;
/* XXX no support for I/O vectors currently */
@@ -1368,14 +1437,17 @@ linux_file_write(struct file *file, struct uio *uio, s
if (uio->uio_resid > DEVFS_IOSIZE_MAX)
return (EINVAL);
linux_set_current(td);
- if (filp->f_op->write) {
- bytes = OPW(file, td, filp->f_op->write(filp, uio->uio_iov->iov_base,
+ linux_get_fop(filp, &fop, &ldev);
+ if (fop->write != NULL) {
+ bytes = OPW(file, td, fop->write(filp,
+ uio->uio_iov->iov_base,
uio->uio_iov->iov_len, &uio->uio_offset));
if (bytes >= 0) {
uio->uio_iov->iov_base =
((uint8_t *)uio->uio_iov->iov_base) + bytes;
uio->uio_iov->iov_len -= bytes;
uio->uio_resid -= bytes;
+ error = 0;
} else {
error = linux_get_error(current, -bytes);
}
@@ -1385,6 +1457,8 @@ linux_file_write(struct file *file, struct uio *uio, s
/* update kqfilter status, if any */
linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE);
+ linux_drop_fop(ldev);
+
return (error);
}
@@ -1393,16 +1467,21 @@ linux_file_poll(struct file *file, int events, struct
struct thread *td)
{
struct linux_file *filp;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
int revents;
filp = (struct linux_file *)file->f_data;
filp->f_flags = file->f_flag;
linux_set_current(td);
- if (filp->f_op->poll != NULL)
- revents = OPW(file, td, filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL)) & events;
- else
+ linux_get_fop(filp, &fop, &ldev);
+ if (fop->poll != NULL) {
+ revents = OPW(file, td, fop->poll(filp,
+ LINUX_POLL_TABLE_NORMAL)) & events;
+ } else {
revents = 0;
-
+ }
+ linux_drop_fop(ldev);
return (revents);
}
@@ -1410,23 +1489,28 @@ static int
linux_file_close(struct file *file, struct thread *td)
{
struct linux_file *filp;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
int error;
filp = (struct linux_file *)file->f_data;
- KASSERT(file_count(filp) == 0, ("File refcount(%d) is not zero", file_count(filp)));
+ KASSERT(file_count(filp) == 0,
+ ("File refcount(%d) is not zero", file_count(filp)));
+ error = 0;
filp->f_flags = file->f_flag;
linux_set_current(td);
linux_poll_wait_dequeue(filp);
- error = -OPW(file, td, filp->f_op->release(filp->f_vnode, filp));
+ linux_get_fop(filp, &fop, &ldev);
+ if (fop->release != NULL)
+ error = -OPW(file, td, fop->release(filp->f_vnode, filp));
funsetown(&filp->f_sigio);
if (filp->f_vnode != NULL)
vdrop(filp->f_vnode);
- if (filp->f_cdev != NULL) {
- /* put a reference on the Linux character device */
- atomic_long_dec(&filp->f_cdev->refs);
- }
+ linux_drop_fop(ldev);
+ if (filp->f_cdev != NULL)
+ linux_cdev_deref(filp->f_cdev);
kfree(filp);
return (error);
@@ -1437,27 +1521,30 @@ linux_file_ioctl(struct file *fp, u_long cmd, void *da
struct thread *td)
{
struct linux_file *filp;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
int error;
+ error = 0;
filp = (struct linux_file *)fp->f_data;
filp->f_flags = fp->f_flag;
- error = 0;
+ linux_get_fop(filp, &fop, &ldev);
linux_set_current(td);
switch (cmd) {
case FIONBIO:
break;
case FIOASYNC:
- if (filp->f_op->fasync == NULL)
+ if (fop->fasync == NULL)
break;
- error = -OPW(fp, td, filp->f_op->fasync(0, filp, fp->f_flag & FASYNC));
+ error = -OPW(fp, td, fop->fasync(0, filp, fp->f_flag & FASYNC));
break;
case FIOSETOWN:
error = fsetown(*(int *)data, &filp->f_sigio);
if (error == 0) {
- if (filp->f_op->fasync == NULL)
+ if (fop->fasync == NULL)
break;
- error = -OPW(fp, td, filp->f_op->fasync(0, filp,
+ error = -OPW(fp, td, fop->fasync(0, filp,
fp->f_flag & FASYNC));
}
break;
@@ -1465,16 +1552,17 @@ linux_file_ioctl(struct file *fp, u_long cmd, void *da
*(int *)data = fgetown(&filp->f_sigio);
break;
default:
- error = linux_file_ioctl_sub(fp, filp, cmd, data, td);
+ error = linux_file_ioctl_sub(fp, filp, fop, cmd, data, td);
break;
}
+ linux_drop_fop(ldev);
return (error);
}
static int
linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot,
vm_prot_t *maxprotp, int *flagsp, struct file *fp,
- vm_ooffset_t *foff, vm_object_t *objp)
+ vm_ooffset_t *foff, const struct file_operations *fop, vm_object_t *objp)
{
/*
* Character devices do not provide private mappings
@@ -1486,7 +1574,8 @@ linux_file_mmap_sub(struct thread *td, vm_size_t objsi
if ((*flagsp & (MAP_PRIVATE | MAP_COPY)) != 0)
return (EINVAL);
- return (linux_file_mmap_single(fp, foff, objsize, objp, (int)prot, td));
+ return (linux_file_mmap_single(fp, fop, foff, objsize, objp,
+ (int)prot, td));
}
static int
@@ -1495,6 +1584,8 @@ linux_file_mmap(struct file *fp, vm_map_t map, vm_offs
struct thread *td)
{
struct linux_file *filp;
+ const struct file_operations *fop;
+ struct linux_cdev *ldev;
struct mount *mp;
struct vnode *vp;
vm_object_t object;
@@ -1541,15 +1632,18 @@ linux_file_mmap(struct file *fp, vm_map_t map, vm_offs
}
maxprot &= cap_maxprot;
- error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp, &foff,
- &object);
+ linux_get_fop(filp, &fop, &ldev);
+ error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp,
+ &foff, fop, &object);
if (error != 0)
- return (error);
+ goto out;
error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
foff, FALSE, td);
if (error != 0)
vm_object_deallocate(object);
+out:
+ linux_drop_fop(ldev);
return (error);
}
@@ -1971,6 +2065,14 @@ linux_completion_done(struct completion *c)
}
static void
+linux_cdev_deref(struct linux_cdev *ldev)
+{
+
+ if (refcount_release(&ldev->refs))
+ kfree(ldev);
+}
+
+static void
linux_cdev_release(struct kobject *kobj)
{
struct linux_cdev *cdev;
@@ -1979,7 +2081,7 @@ linux_cdev_release(struct kobject *kobj)
cdev = container_of(kobj, struct linux_cdev, kobj);
parent = kobj->parent;
linux_destroy_dev(cdev);
- kfree(cdev);
+ linux_cdev_deref(cdev);
kobject_put(parent);
}
@@ -1996,20 +2098,19 @@ linux_cdev_static_release(struct kobject *kobj)
}
void
-linux_destroy_dev(struct linux_cdev *cdev)
+linux_destroy_dev(struct linux_cdev *ldev)
{
- if (cdev->cdev == NULL)
+ if (ldev->cdev == NULL)
return;
- atomic_long_dec(&cdev->refs);
+ MPASS((ldev->siref & LDEV_SI_DTR) == 0);
+ atomic_set_int(&ldev->siref, LDEV_SI_DTR);
+ while ((atomic_load_int(&ldev->siref) & ~LDEV_SI_DTR) != 0)
+ pause("ldevdtr", hz / 4);
- /* wait for all open files to be closed */
- while (atomic_long_read(&cdev->refs) != -1L)
- pause("ldevdrn", hz);
-
- destroy_dev(cdev->cdev);
- cdev->cdev = NULL;
+ destroy_dev(ldev->cdev);
+ ldev->cdev = NULL;
}
const struct kobj_type linux_cdev_ktype = {
More information about the svn-src-all
mailing list