svn commit: r298833 - in stable/10/sys: kern sys
Jamie Gritton
jamie at FreeBSD.org
Sat Apr 30 03:19:08 UTC 2016
Author: jamie
Date: Sat Apr 30 03:19:07 2016
New Revision: 298833
URL: https://svnweb.freebsd.org/changeset/base/298833
Log:
MFC r298565:
Add a new jail OSD method, PR_METHOD_REMOVE. It's called when a jail is
removed from the user perspective, i.e. when the last pr_uref goes away,
even though the jail mail still exist in the dying state. It will also
be called if either PR_METHOD_CREATE or PR_METHOD_SET fail.
MFC r298683:
Delay removing the last jail reference in prison_proc_free, and instead
put it off into the pr_task. This is similar to prison_free, and in fact
uses the same task even though they do something slightly different.
MFC r298566:
Pass the current/new jail to PR_METHOD_CHECK, which pushes the call
until after the jail is found or created. This requires unlocking the
jail for the call and re-locking it afterward, but that works because
nothing in the jail has been changed yet, and other processes won't
change the important fields as long as allprison_lock remains held.
Keep better track of name vs namelc in kern_jail_set. Name should
always be the hierarchical name (relative to the caller), and namelc
the last component.
MFC r298668:
Use crcopysafe in jail_attach.
PR: 48471
Modified:
stable/10/sys/kern/kern_jail.c
stable/10/sys/sys/jail.h
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/kern/kern_jail.c
==============================================================================
--- stable/10/sys/kern/kern_jail.c Sat Apr 30 03:05:36 2016 (r298832)
+++ stable/10/sys/kern/kern_jail.c Sat Apr 30 03:19:07 2016 (r298833)
@@ -560,8 +560,9 @@ kern_jail_set(struct thread *td, struct
void *op;
#endif
unsigned long hid;
- size_t namelen, onamelen;
- int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
+ size_t namelen, onamelen, pnamelen;
+ int born, created, cuflags, descend, enforce;
+ int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
int fi, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
@@ -584,7 +585,7 @@ kern_jail_set(struct thread *td, struct
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
return (error);
- mypr = ppr = td->td_ucred->cr_prison;
+ mypr = td->td_ucred->cr_prison;
if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
@@ -611,6 +612,13 @@ kern_jail_set(struct thread *td, struct
#endif
g_path = NULL;
+ cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
+ if (!cuflags) {
+ error = EINVAL;
+ vfs_opterror(opts, "no valid operation (create or update)");
+ goto done_errmsg;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1020,42 +1028,18 @@ kern_jail_set(struct thread *td, struct
}
/*
- * Grab the allprison lock before letting modules check their
- * parameters. Once we have it, do not let go so we'll have a
- * consistent view of the OSD list.
- */
- sx_xlock(&allprison_lock);
- error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
- if (error)
- goto done_unlock_list;
-
- /* By now, all parameters should have been noted. */
- TAILQ_FOREACH(opt, opts, link) {
- if (!opt->seen && strcmp(opt->name, "errmsg")) {
- error = EINVAL;
- vfs_opterror(opts, "unknown parameter: %s", opt->name);
- goto done_unlock_list;
- }
- }
-
- /*
- * See if we are creating a new record or updating an existing one.
+ * Find the specified jail, or at least its parent.
* This abuses the file error codes ENOENT and EEXIST.
*/
- cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
- if (!cuflags) {
- error = EINVAL;
- vfs_opterror(opts, "no valid operation (create or update)");
- goto done_unlock_list;
- }
pr = NULL;
- namelc = NULL;
+ ppr = mypr;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
if (*p != '\0')
jid = 0;
}
+ sx_xlock(&allprison_lock);
if (jid != 0) {
/*
* See if a requested jid already exists. There is an
@@ -1121,6 +1105,7 @@ kern_jail_set(struct thread *td, struct
* and updates keyed by the name itself (where the name must exist
* because that is the jail being updated).
*/
+ namelc = NULL;
if (name != NULL) {
namelc = strrchr(name, '.');
if (namelc == NULL)
@@ -1131,7 +1116,6 @@ kern_jail_set(struct thread *td, struct
* parent and child names, and make sure the parent
* exists or matches an already found jail.
*/
- *namelc = '\0';
if (pr != NULL) {
if (strncmp(name, ppr->pr_name, namelc - name)
|| ppr->pr_name[namelc - name] != '\0') {
@@ -1142,6 +1126,7 @@ kern_jail_set(struct thread *td, struct
goto done_unlock_list;
}
} else {
+ *namelc = '\0';
ppr = prison_find_name(mypr, name);
if (ppr == NULL) {
error = ENOENT;
@@ -1150,17 +1135,18 @@ kern_jail_set(struct thread *td, struct
goto done_unlock_list;
}
mtx_unlock(&ppr->pr_mtx);
+ *namelc = '.';
}
- name = ++namelc;
+ namelc++;
}
- if (name[0] != '\0') {
- namelen =
+ if (namelc[0] != '\0') {
+ pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
name_again:
deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
if (tpr != pr && tpr->pr_ref > 0 &&
- !strcmp(tpr->pr_name + namelen, name)) {
+ !strcmp(tpr->pr_name + pnamelen, namelc)) {
if (pr == NULL &&
cuflags != JAIL_CREATE) {
mtx_lock(&tpr->pr_mtx);
@@ -1237,7 +1223,8 @@ kern_jail_set(struct thread *td, struct
if (ppr->pr_ref == 0) {
mtx_unlock(&ppr->pr_mtx);
error = ENOENT;
- vfs_opterror(opts, "parent jail went away!");
+ vfs_opterror(opts, "jail \"%s\" not found",
+ prison_name(mypr, ppr));
goto done_unlock_list;
}
ppr->pr_ref++;
@@ -1291,8 +1278,8 @@ kern_jail_set(struct thread *td, struct
pr->pr_id = jid;
/* Set some default values, and inherit some from the parent. */
- if (name == NULL)
- name = "";
+ if (namelc == NULL)
+ namelc = "";
if (path == NULL) {
path = "/";
root = mypr->pr_root;
@@ -1355,6 +1342,7 @@ kern_jail_set(struct thread *td, struct
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
+ TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
#ifdef VIMAGE
/* Allocate a new vnet if specified. */
@@ -1374,7 +1362,7 @@ kern_jail_set(struct thread *td, struct
mtx_lock(&pr->pr_mtx);
/*
* New prisons do not yet have a reference, because we do not
- * want other to see the incomplete prison once the
+ * want others to see the incomplete prison once the
* allprison_lock is downgraded.
*/
} else {
@@ -1588,13 +1576,13 @@ kern_jail_set(struct thread *td, struct
}
#endif
onamelen = namelen = 0;
- if (name != NULL) {
+ if (namelc != NULL) {
/* Give a default name of the jid. Also allow the name to be
* explicitly the jid - but not any other number, and only in
* normal form (no leading zero/etc).
*/
- if (name[0] == '\0')
- snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
+ if (namelc[0] == '\0')
+ snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
else if ((strtoul(namelc, &p, 10) != jid ||
namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
error = EINVAL;
@@ -1606,9 +1594,10 @@ kern_jail_set(struct thread *td, struct
* Make sure the name isn't too long for the prison or its
* children.
*/
- onamelen = strlen(pr->pr_name);
- namelen = strlen(name);
- if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
+ pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
+ onamelen = strlen(pr->pr_name + pnamelen);
+ namelen = strlen(namelc);
+ if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
error = ENAMETOOLONG;
goto done_deref_locked;
}
@@ -1625,6 +1614,30 @@ kern_jail_set(struct thread *td, struct
goto done_deref_locked;
}
+ /*
+ * Let modules check their parameters. This requires unlocking and
+ * then re-locking the prison, but this is still a valid state as long
+ * as allprison_lock remains xlocked.
+ */
+ mtx_unlock(&pr->pr_mtx);
+ error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
+ if (error != 0) {
+ prison_deref(pr, created
+ ? PD_LIST_XLOCKED
+ : PD_DEREF | PD_LIST_XLOCKED);
+ goto done_releroot;
+ }
+ mtx_lock(&pr->pr_mtx);
+
+ /* At this point, all valid parameters should have been noted. */
+ TAILQ_FOREACH(opt, opts, link) {
+ if (!opt->seen && strcmp(opt->name, "errmsg")) {
+ error = EINVAL;
+ vfs_opterror(opts, "unknown parameter: %s", opt->name);
+ goto done_deref_locked;
+ }
+ }
+
/* Set the parameters of the prison. */
#ifdef INET
redo_ip4 = 0;
@@ -1698,12 +1711,12 @@ kern_jail_set(struct thread *td, struct
FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
tpr->pr_devfs_rsnum = rsnum;
}
- if (name != NULL) {
+ if (namelc != NULL) {
if (ppr == &prison0)
- strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
+ strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
else
snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
- ppr->pr_name, name);
+ ppr->pr_name, namelc);
/* Change this component of child names. */
FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
@@ -1781,6 +1794,7 @@ kern_jail_set(struct thread *td, struct
* for now, so new ones will remain unseen until after the module
* handlers have completed.
*/
+ born = pr->pr_uref == 0;
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
if (pr_flags & PR_PERSIST) {
pr->pr_ref++;
@@ -1850,15 +1864,20 @@ kern_jail_set(struct thread *td, struct
/* Let the modules do their work. */
sx_downgrade(&allprison_lock);
- if (created) {
+ if (born) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
if (error) {
- prison_deref(pr, PD_LIST_SLOCKED);
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ prison_deref(pr, created
+ ? PD_LIST_SLOCKED
+ : PD_DEREF | PD_LIST_SLOCKED);
goto done_errmsg;
}
}
error = osd_jail_call(pr, PR_METHOD_SET, opts);
if (error) {
+ if (born)
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
prison_deref(pr, created
? PD_LIST_SLOCKED
: PD_DEREF | PD_LIST_SLOCKED);
@@ -1910,7 +1929,7 @@ kern_jail_set(struct thread *td, struct
sx_sunlock(&allprison_lock);
}
- goto done_errmsg;
+ goto done_free;
done_deref_locked:
prison_deref(pr, created
@@ -2404,7 +2423,6 @@ sys_jail_attach(struct thread *td, struc
static int
do_jail_attach(struct thread *td, struct prison *pr)
{
- struct prison *ppr;
struct proc *p;
struct ucred *newcred, *oldcred;
int error;
@@ -2432,7 +2450,6 @@ do_jail_attach(struct thread *td, struct
/*
* Reparent the newly attached process to this jail.
*/
- ppr = td->td_ucred->cr_prison;
p = td->td_proc;
error = cpuset_setproc_update_set(p, pr->pr_cpuset);
if (error)
@@ -2451,23 +2468,23 @@ do_jail_attach(struct thread *td, struct
newcred = crget();
PROC_LOCK(p);
- oldcred = p->p_ucred;
- setsugid(p);
- crcopy(newcred, oldcred);
+ oldcred = crcopysafe(p, newcred);
newcred->cr_prison = pr;
p->p_ucred = newcred;
+ setsugid(p);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
#endif
+ prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
crfree(oldcred);
- prison_deref(ppr, PD_DEREF | PD_DEUREF);
return (0);
+
e_unlock:
VOP_UNLOCK(pr->pr_root, 0);
e_revert_osd:
/* Tell modules this thread is still in its old jail after all. */
- (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
+ (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
prison_deref(pr, PD_DEREF | PD_DEUREF);
return (error);
}
@@ -2576,16 +2593,13 @@ prison_allow(struct ucred *cred, unsigne
void
prison_free_locked(struct prison *pr)
{
+ int ref;
mtx_assert(&pr->pr_mtx, MA_OWNED);
- pr->pr_ref--;
- if (pr->pr_ref == 0) {
- mtx_unlock(&pr->pr_mtx);
- TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
- taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
- return;
- }
+ ref = --pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
+ if (ref == 0)
+ taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
void
@@ -2596,11 +2610,17 @@ prison_free(struct prison *pr)
prison_free_locked(pr);
}
+/*
+ * Complete a call to either prison_free or prison_proc_free.
+ */
static void
prison_complete(void *context, int pending)
{
+ struct prison *pr = context;
- prison_deref((struct prison *)context, 0);
+ mtx_lock(&pr->pr_mtx);
+ prison_deref(pr, pr->pr_uref
+ ? PD_DEREF | PD_DEUREF | PD_LOCKED : PD_LOCKED);
}
/*
@@ -2613,19 +2633,53 @@ static void
prison_deref(struct prison *pr, int flags)
{
struct prison *ppr, *tpr;
+ int ref, lasturef;
if (!(flags & PD_LOCKED))
mtx_lock(&pr->pr_mtx);
for (;;) {
if (flags & PD_DEUREF) {
+ KASSERT(pr->pr_uref > 0,
+ ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
+ pr->pr_id));
pr->pr_uref--;
+ lasturef = pr->pr_uref == 0;
+ if (lasturef)
+ pr->pr_ref++;
KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
- }
- if (flags & PD_DEREF)
+ } else
+ lasturef = 0;
+ if (flags & PD_DEREF) {
+ KASSERT(pr->pr_ref > 0,
+ ("prison_deref PD_DEREF on a dead prison (jid=%d)",
+ pr->pr_id));
pr->pr_ref--;
- /* If the prison still has references, nothing else to do. */
- if (pr->pr_ref > 0) {
+ }
+ ref = pr->pr_ref;
+ mtx_unlock(&pr->pr_mtx);
+
+ /*
+ * Tell the modules if the last user reference was removed
+ * (even it sticks around in dying state).
+ */
+ if (lasturef) {
+ if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
+ if (ref > 1) {
+ sx_slock(&allprison_lock);
+ flags |= PD_LIST_SLOCKED;
+ } else {
+ sx_xlock(&allprison_lock);
+ flags |= PD_LIST_XLOCKED;
+ }
+ }
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ mtx_lock(&pr->pr_mtx);
+ ref = --pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
+ }
+
+ /* If the prison still has references, nothing else to do. */
+ if (ref > 0) {
if (flags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (flags & PD_LIST_XLOCKED)
@@ -2633,7 +2687,6 @@ prison_deref(struct prison *pr, int flag
return;
}
- mtx_unlock(&pr->pr_mtx);
if (flags & PD_LIST_SLOCKED) {
if (!sx_try_upgrade(&allprison_lock)) {
sx_sunlock(&allprison_lock);
@@ -2715,7 +2768,20 @@ prison_proc_free(struct prison *pr)
mtx_lock(&pr->pr_mtx);
KASSERT(pr->pr_uref > 0,
("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
- prison_deref(pr, PD_DEUREF | PD_LOCKED);
+ if (pr->pr_uref > 1)
+ pr->pr_uref--;
+ else {
+ /*
+ * Don't remove the last user reference in this context, which
+ * is expected to be a process that is not only locked, but
+ * also half dead.
+ */
+ pr->pr_ref++;
+ mtx_unlock(&pr->pr_mtx);
+ taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
+ return;
+ }
+ mtx_unlock(&pr->pr_mtx);
}
Modified: stable/10/sys/sys/jail.h
==============================================================================
--- stable/10/sys/sys/jail.h Sat Apr 30 03:05:36 2016 (r298832)
+++ stable/10/sys/sys/jail.h Sat Apr 30 03:19:07 2016 (r298833)
@@ -149,7 +149,6 @@ struct prison_racct;
* (p) locked by pr_mtx
* (c) set only during creation before the structure is shared, no mutex
* required to read
- * (d) set only during destruction of jail, no mutex needed
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
@@ -161,7 +160,7 @@ struct prison {
LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */
struct prison *pr_parent; /* (c) containing jail */
struct mtx pr_mtx;
- struct task pr_task; /* (d) destroy task */
+ struct task pr_task; /* (c) destroy task */
struct osd pr_osd; /* (p) additional data */
struct cpuset *pr_cpuset; /* (p) cpuset */
struct vnet *pr_vnet; /* (c) network stack */
@@ -243,7 +242,8 @@ struct prison_racct {
#define PR_METHOD_SET 2
#define PR_METHOD_CHECK 3
#define PR_METHOD_ATTACH 4
-#define PR_MAXMETHOD 5
+#define PR_METHOD_REMOVE 5
+#define PR_MAXMETHOD 6
/*
* Lock/unlock a prison.
More information about the svn-src-all
mailing list