svn commit: r354484 - head/sys/netinet
Gleb Smirnoff
glebius at FreeBSD.org
Thu Nov 7 21:27:34 UTC 2019
Author: glebius
Date: Thu Nov 7 21:27:32 2019
New Revision: 354484
URL: https://svnweb.freebsd.org/changeset/base/354484
Log:
Now that there is no R/W lock on PCB list the pcblist sysctls
handlers can be greatly simplified. All the previous double
cycling and complex locking was added to avoid these functions
holding global PCB locks for extended period of time, preventing
addition of new entries.
Modified:
head/sys/netinet/ip_divert.c
head/sys/netinet/raw_ip.c
head/sys/netinet/tcp_subr.c
head/sys/netinet/tcp_syncache.c
head/sys/netinet/tcp_syncache.h
head/sys/netinet/udp_usrreq.c
Modified: head/sys/netinet/ip_divert.c
==============================================================================
--- head/sys/netinet/ip_divert.c Thu Nov 7 21:23:07 2019 (r354483)
+++ head/sys/netinet/ip_divert.c Thu Nov 7 21:27:32 2019 (r354484)
@@ -629,71 +629,41 @@ div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
static int
div_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
+ if (req->newptr != 0)
+ return EPERM;
+
if (req->oldptr == 0) {
+ int n;
+
n = V_divcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return 0;
}
- if (req->newptr != 0)
- return EPERM;
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_WLOCK(&V_divcbinfo);
- gencnt = V_divcbinfo.ipi_gencnt;
- n = V_divcbinfo.ipi_count;
- INP_INFO_WUNLOCK(&V_divcbinfo);
-
- error = sysctl_wire_old_buffer(req,
- 2 * sizeof(xig) + n*sizeof(struct xinpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_divcbinfo.ipi_count;
+ xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return error;
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return ENOMEM;
-
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
- for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
@@ -702,17 +672,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_divcbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_divcbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
- struct epoch_tracker et;
/*
* Give the user an updated idea of our state.
* If the generation differs from what we told
@@ -720,15 +682,13 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
- return error;
+
+ return (error);
}
#ifdef SYSCTL_NODE
Modified: head/sys/netinet/raw_ip.c
==============================================================================
--- head/sys/netinet/raw_ip.c Thu Nov 7 21:23:07 2019 (r354483)
+++ head/sys/netinet/raw_ip.c Thu Nov 7 21:27:32 2019 (r354484)
@@ -1067,97 +1067,67 @@ rip_send(struct socket *so, int flags, struct mbuf *m,
static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
+ if (req->newptr != 0)
+ return (EPERM);
+
if (req->oldptr == 0) {
+ int n;
+
n = V_ripcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
- if (req->newptr != 0)
- return (EPERM);
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
+ return (error);
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_WLOCK(&V_ripcbinfo);
- gencnt = V_ripcbinfo.ipi_gencnt;
- n = V_ripcbinfo.ipi_count;
- INP_INFO_WUNLOCK(&V_ripcbinfo);
-
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_ripcbinfo.ipi_count;
+ xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen &&
+ cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
+ if (error)
+ break;
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_ripcbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_ripcbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
- struct epoch_tracker et;
/*
* Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c Thu Nov 7 21:23:07 2019 (r354483)
+++ head/sys/netinet/tcp_subr.c Thu Nov 7 21:27:32 2019 (r354484)
@@ -2127,17 +2127,17 @@ tcp_notify(struct inpcb *inp, int error)
static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, m, n, pcb_count;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
- struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ struct xinpgen xig;
+ int error;
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
+ if (req->newptr != NULL)
+ return (EPERM);
+
if (req->oldptr == NULL) {
+ int n;
+
n = V_tcbinfo.ipi_count +
counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
n += imax(n / 8, 10);
@@ -2145,44 +2145,29 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
return (0);
}
- if (req->newptr != NULL)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_LIST_RLOCK(&V_tcbinfo);
- gencnt = V_tcbinfo.ipi_gencnt;
- n = V_tcbinfo.ipi_count;
- INP_LIST_RUNLOCK(&V_tcbinfo);
-
- m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
-
- error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + (n + m) * sizeof(struct xtcpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n + m;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
+ xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- error = syncache_pcblist(req, m, &pcb_count);
+ error = syncache_pcblist(req);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-
- INP_INFO_WLOCK(&V_tcbinfo);
- for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
- inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
+ INP_RLOCK(inp);
+ if (inp->inp_gencnt <= xig.xig_gen) {
/*
* XXX: This use of cr_cansee(), introduced with
* TCP state changes, is not quite right, but for
@@ -2197,36 +2182,18 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else
error = cr_canseeinpcb(req->td->td_ucred, inp);
if (error == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_tcbinfo);
- n = i;
+ struct xtcpcb xt;
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
- struct xtcpcb xt;
-
- tcp_inptoxtp(inp, &xt);
- INP_RUNLOCK(inp);
- error = SYSCTL_OUT(req, &xt, sizeof xt);
+ tcp_inptoxtp(inp, &xt);
+ INP_RUNLOCK(inp);
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ if (error)
+ break;
+ }
} else
INP_RUNLOCK(inp);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (!error) {
/*
@@ -2236,14 +2203,13 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
- xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_LIST_RUNLOCK(&V_tcbinfo);
+ xig.xig_count = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
Modified: head/sys/netinet/tcp_syncache.c
==============================================================================
--- head/sys/netinet/tcp_syncache.c Thu Nov 7 21:23:07 2019 (r354483)
+++ head/sys/netinet/tcp_syncache.c Thu Nov 7 21:27:32 2019 (r354484)
@@ -2452,46 +2452,41 @@ syncache_unpause(void *arg)
* amount of space the caller allocated for this function to use.
*/
int
-syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
+syncache_pcblist(struct sysctl_req *req)
{
struct xtcpcb xt;
struct syncache *sc;
struct syncache_head *sch;
- int count, error, i;
+ int error, i;
- for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
+ bzero(&xt, sizeof(xt));
+ xt.xt_len = sizeof(xt);
+ xt.t_state = TCPS_SYN_RECEIVED;
+ xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
+ xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
+ xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
+ xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
+
+ for (i = 0; i < V_tcp_syncache.hashsize; i++) {
sch = &V_tcp_syncache.hashbase[i];
SCH_LOCK(sch);
TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
- if (count >= max_pcbs) {
- SCH_UNLOCK(sch);
- goto exit;
- }
if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
continue;
- bzero(&xt, sizeof(xt));
- xt.xt_len = sizeof(xt);
if (sc->sc_inc.inc_flags & INC_ISIPV6)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
- xt.t_state = TCPS_SYN_RECEIVED;
- xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
- xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
- xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
- xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
error = SYSCTL_OUT(req, &xt, sizeof xt);
if (error) {
SCH_UNLOCK(sch);
- goto exit;
+ return (0);
}
- count++;
}
SCH_UNLOCK(sch);
}
-exit:
- *pcbs_exported = count;
- return error;
+
+ return (0);
}
Modified: head/sys/netinet/tcp_syncache.h
==============================================================================
--- head/sys/netinet/tcp_syncache.h Thu Nov 7 21:23:07 2019 (r354483)
+++ head/sys/netinet/tcp_syncache.h Thu Nov 7 21:27:32 2019 (r354484)
@@ -48,7 +48,7 @@ int syncache_add(struct in_conninfo *, struct tcpopt
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
-int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
+int syncache_pcblist(struct sysctl_req *);
struct syncache {
TAILQ_ENTRY(syncache) sc_hash;
Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c Thu Nov 7 21:23:07 2019 (r354483)
+++ head/sys/netinet/udp_usrreq.c Thu Nov 7 21:27:32 2019 (r354484)
@@ -851,87 +851,53 @@ udplite_ctlinput(int cmd, struct sockaddr *sa, void *v
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
- /*
- * The process of preparing the PCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
+ if (req->newptr != 0)
+ return (EPERM);
+
if (req->oldptr == 0) {
+ int n;
+
n = V_udbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
- if (req->newptr != 0)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
- gencnt = V_udbinfo.ipi_gencnt;
- n = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
-
- error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + n * sizeof(struct xinpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_udbinfo.ipi_count;
+ xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return (ENOMEM);
-
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
- for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen &&
+ cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
+ if (error)
+ break;
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_udbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_udbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
/*
@@ -940,14 +906,12 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
More information about the svn-src-all
mailing list