git: 81a34d374ed6 - main - protosw: retire pr_drain and use EVENTHANDLER(9) directly
Date: Wed, 17 Aug 2022 18:52:21 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=81a34d374ed6e5a7b14f24583bc8e3abfdc66306 commit 81a34d374ed6e5a7b14f24583bc8e3abfdc66306 Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2022-08-17 18:50:31 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2022-08-17 18:50:31 +0000 protosw: retire pr_drain and use EVENTHANDLER(9) directly The method was called for two different conditions: 1) the VM layer is low on pages or 2) one of UMA zones of mbuf allocator exhausted. This change 2) into a new event handler, but all affected network subsystems modified to subscribe to both, so this change shall not bring functional changes under different low memory situations. There were three subsystems still using pr_drain: TCP, SCTP and frag6. The latter had its protosw entry for the only reason to register its pr_drain method. Reviewed by: tuexen, melifaro Differential revision: https://reviews.freebsd.org/D36164 --- sys/kern/kern_mbuf.c | 28 ++------------------------- sys/kern/uipc_debug.c | 3 --- sys/kern/uipc_domain.c | 1 - sys/netinet/in_proto.c | 11 ----------- sys/netinet/ip_input.c | 15 --------------- sys/netinet/ip_reass.c | 48 ++++++++++++++++++++++++++++------------------- sys/netinet/ip_var.h | 1 - sys/netinet/sctp_module.c | 6 ------ sys/netinet/sctp_pcb.c | 11 +++++++++-- sys/netinet/sctp_pcb.h | 2 -- sys/netinet/sctp_var.h | 1 - sys/netinet/tcp_subr.c | 9 ++++++++- sys/netinet/tcp_var.h | 1 - sys/netinet6/in6_proto.c | 15 --------------- sys/netinet6/ip6_input.c | 4 ++++ sys/sys/eventhandler.h | 2 ++ sys/sys/protosw.h | 5 ----- sys/vm/vm_pageout.h | 1 + 18 files changed, 55 insertions(+), 109 deletions(-) diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 2e307975b9ca..1c0c5624b6d7 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -39,14 +39,12 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/systm.h> #include <sys/mbuf.h> -#include <sys/domain.h> #include <sys/eventhandler.h> #include <sys/kernel.h> #include <sys/ktls.h> #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> -#include <sys/protosw.h> #include <sys/refcount.h> #include <sys/sf_buf.h> #include <sys/smp.h> @@ -396,14 +394,6 @@ mbuf_init(void *dummy) uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); - /* - * Hook event handler for low-memory situation, used to - * drain protocols and push data back to the caches (UMA - * later pushes it back to VM). - */ - EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, - EVENTHANDLER_PRI_FIRST); - snd_tag_count = counter_u64_alloc(M_WAITOK); } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); @@ -828,26 +818,12 @@ mb_ctor_pack(void *mem, int size, void *arg, int how) /* * This is the protocol drain routine. Called by UMA whenever any of the * mbuf zones is closed to its limit. - * - * No locks should be held when this is called. The drain routines have to - * presently acquire some locks which raises the possibility of lock order - * reversal. */ static void mb_reclaim(uma_zone_t zone __unused, int pending __unused) { - struct epoch_tracker et; - struct domain *dp; - struct protosw *pr; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); - - NET_EPOCH_ENTER(et); - for (dp = domains; dp != NULL; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) - if (pr->pr_drain != NULL) - (*pr->pr_drain)(); - NET_EPOCH_EXIT(et); + + EVENTHANDLER_INVOKE(mbuf_lowmem, VM_LOW_MBUFS); } /* diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c index 5f96850431a0..3f54e3e46f26 100644 --- a/sys/kern/uipc_debug.c +++ b/sys/kern/uipc_debug.c @@ -315,9 +315,6 @@ db_print_protosw(struct protosw *pr, const char *prname, int indent) db_print_indent(indent); db_printf("pr_ctloutput: %p ", pr->pr_ctloutput); - - db_print_indent(indent); - db_printf("pr_drain: %p\n", pr->pr_drain); } static void diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c index 20e7c87a6c20..c6a79d34beb2 100644 --- a/sys/kern/uipc_domain.c +++ b/sys/kern/uipc_domain.c @@ -435,7 +435,6 @@ pf_proto_unregister(int family, int protocol, int type) dpr->pr_protocol = PROTO_SPACER; dpr->pr_flags = 0; dpr->pr_ctloutput = NULL; - dpr->pr_drain = NULL; dpr->pr_usrreqs = &nousrreqs; /* Job is done, not more protection required. */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 9b5f41976197..cac885560a30 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -108,14 +108,6 @@ extern struct domain inetdomain; } struct protosw inetsw[] = { -{ - .pr_type = 0, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IP, - .pr_flags = PR_CAPATTACH, - .pr_drain = ip_drain, - .pr_usrreqs = &nousrreqs -}, { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, @@ -131,7 +123,6 @@ struct protosw inetsw[] = { .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD| PR_CAPATTACH, .pr_ctloutput = tcp_ctloutput, - .pr_drain = tcp_drain, .pr_usrreqs = &tcp_usrreqs }, #ifdef SCTP @@ -141,7 +132,6 @@ struct protosw inetsw[] = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs }, { @@ -150,7 +140,6 @@ struct protosw inetsw[] = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = NULL, /* Covered by the SOCK_SEQPACKET entry. */ .pr_usrreqs = &sctp_usrreqs }, #endif /* SCTP */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index ca451ef48649..e17d6fccb202 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -101,7 +101,6 @@ CTASSERT(sizeof(struct ip) == 20); /* IP reassembly functions are defined in ip_reass.c. */ extern void ipreass_init(void); -extern void ipreass_drain(void); #ifdef VIMAGE extern void ipreass_destroy(void); #endif @@ -845,20 +844,6 @@ bad: m_freem(m); } -void -ip_drain(void) -{ - VNET_ITERATOR_DECL(vnet_iter); - - VNET_LIST_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - ipreass_drain(); - CURVNET_RESTORE(); - } - VNET_LIST_RUNLOCK_NOSLEEP(); -} - int ipproto_register(uint8_t proto, ipproto_input_t input, ipproto_ctlinput_t ctl) { diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c index b436d6282206..a0a8dd42b758 100644 --- a/sys/netinet/ip_reass.c +++ b/sys/netinet/ip_reass.c @@ -92,7 +92,6 @@ VNET_DEFINE_STATIC(int, ipreass_maxbucketsize); #define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize) void ipreass_init(void); -void ipreass_drain(void); #ifdef VIMAGE void ipreass_destroy(void); #endif @@ -597,6 +596,31 @@ ipreass_timer_init(void *arg __unused) } SYSINIT(ipreass, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipreass_timer_init, NULL); +/* + * Drain off all datagram fragments. + */ +static void +ipreass_drain(void) +{ + VNET_ITERATOR_DECL(vnet_iter); + + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + for (int i = 0; i < IPREASS_NHASH; i++) { + IPQ_LOCK(i); + while(!TAILQ_EMPTY(&V_ipq[i].head)) + ipq_drop(&V_ipq[i], + TAILQ_FIRST(&V_ipq[i].head)); + KASSERT(V_ipq[i].count == 0, + ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i, + V_ipq[i].count, V_ipq)); + IPQ_UNLOCK(i); + } + CURVNET_RESTORE(); + } +} + + /* * Initialize IP reassembly structures. */ @@ -623,24 +647,10 @@ ipreass_init(void) maxfrags = IP_MAXFRAGS; EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change, NULL, EVENTHANDLER_PRI_ANY); - } -} - -/* - * Drain off all datagram fragments. - */ -void -ipreass_drain(void) -{ - - for (int i = 0; i < IPREASS_NHASH; i++) { - IPQ_LOCK(i); - while(!TAILQ_EMPTY(&V_ipq[i].head)) - ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head)); - KASSERT(V_ipq[i].count == 0, - ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i, - V_ipq[i].count, V_ipq)); - IPQ_UNLOCK(i); + EVENTHANDLER_REGISTER(vm_lowmem, ipreass_drain, NULL, + LOWMEM_PRI_DEFAULT); + EVENTHANDLER_REGISTER(mbuf_lowmem, ipreass_drain, NULL, + LOWMEM_PRI_DEFAULT); } } diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 8711e0291379..7701b64c1be0 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -214,7 +214,6 @@ int inp_getmoptions(struct inpcb *, struct sockopt *); int inp_setmoptions(struct inpcb *, struct sockopt *); int ip_ctloutput(struct socket *, struct sockopt *sopt); -void ip_drain(void); int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags); void ip_forward(struct mbuf *m, int srcrt); diff --git a/sys/netinet/sctp_module.c b/sys/netinet/sctp_module.c index ea49b74343e3..ba0d585bd541 100644 --- a/sys/netinet/sctp_module.c +++ b/sys/netinet/sctp_module.c @@ -61,7 +61,6 @@ struct protosw sctp_stream_protosw = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs, }; @@ -71,7 +70,6 @@ struct protosw sctp_seqpacket_protosw = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs, }; #endif @@ -85,7 +83,6 @@ struct protosw sctp6_stream_protosw = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp6_usrreqs, }; @@ -95,9 +92,6 @@ struct protosw sctp6_seqpacket_protosw = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, -#ifndef INET /* Do not call initialization and drain routines twice. */ - .pr_drain = sctp_drain, -#endif .pr_usrreqs = &sctp6_usrreqs, }; #endif diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index 73c550b86d65..0fb92e7408f4 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -6942,15 +6942,18 @@ sctp_drain_mbufs(struct sctp_tcb *stcb) */ } -void +static void sctp_drain(void) { + struct epoch_tracker et; + VNET_ITERATOR_DECL(vnet_iter); + + NET_EPOCH_ENTER(et); /* * We must walk the PCB lists for ALL associations here. The system * is LOW on MBUF's and needs help. This is where reneging will * occur. We really hope this does NOT happen! */ - VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); @@ -6962,6 +6965,7 @@ sctp_drain(void) #ifdef VIMAGE continue; #else + NET_EPOCH_EXIT(et); return; #endif } @@ -6981,7 +6985,10 @@ sctp_drain(void) CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); } +EVENTHANDLER_DEFINE(vm_lowmem, sctp_drain, NULL, LOWMEM_PRI_DEFAULT); +EVENTHANDLER_DEFINE(mbuf_lowmem, sctp_drain, NULL, LOWMEM_PRI_DEFAULT); /* * start a new iterator diff --git a/sys/netinet/sctp_pcb.h b/sys/netinet/sctp_pcb.h index 687ccf6a1c50..fd8115a8101a 100644 --- a/sys/netinet/sctp_pcb.h +++ b/sys/netinet/sctp_pcb.h @@ -611,8 +611,6 @@ sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *, bool sctp_is_vtag_good(uint32_t, uint16_t lport, uint16_t rport, struct timeval *); -/* void sctp_drain(void); */ - int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *); int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp); diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index 16beaa7f8b12..3bff09adb367 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -327,7 +327,6 @@ int sctp_ctloutput(struct socket *, struct sockopt *); void sctp_input_with_port(struct mbuf *, int, uint16_t); int sctp_input(struct mbuf **, int *, int); void sctp_pathmtu_adjustment(struct sctp_tcb *, uint32_t, bool); -void sctp_drain(void); void sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint8_t, uint8_t, uint16_t, uint32_t); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 70d1d2fb942a..e26fe0ec247e 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1448,6 +1448,8 @@ tcp_vnet_init(void *arg __unused) VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_vnet_init, NULL); +static void tcp_drain(void); + static void tcp_init(void *arg __unused) { @@ -1506,6 +1508,8 @@ tcp_init(void *arg __unused) ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); + EVENTHANDLER_REGISTER(vm_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT); + EVENTHANDLER_REGISTER(mbuf_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT); tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK); tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK); @@ -2513,14 +2517,16 @@ tcp_close(struct tcpcb *tp) return (tp); } -void +static void tcp_drain(void) { + struct epoch_tracker et; VNET_ITERATOR_DECL(vnet_iter); if (!do_tcpdrain) return; + NET_EPOCH_ENTER(et); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); @@ -2558,6 +2564,7 @@ tcp_drain(void) CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); } /* diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index fa86ab51d68b..62d64f2dbdb2 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1085,7 +1085,6 @@ void tcp_twclose(struct tcptw *, int); void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *); -void tcp_drain(void); void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, const void *, const void *); diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 52534c579003..963b6a8d9aed 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -141,14 +141,6 @@ static struct pr_usrreqs nousrreqs; } struct protosw inet6sw[] = { -{ - .pr_type = 0, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_IPV6, - .pr_flags = PR_CAPATTACH, - .pr_drain = frag6_drain, - .pr_usrreqs = &nousrreqs, -}, { .pr_type = SOCK_DGRAM, .pr_domain = &inet6domain, @@ -164,9 +156,6 @@ struct protosw inet6sw[] = { .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD| PR_LISTEN|PR_CAPATTACH, .pr_ctloutput = tcp_ctloutput, -#ifndef INET /* don't call initialization, timeout, and drain routines twice */ - .pr_drain = tcp_drain, -#endif .pr_usrreqs = &tcp6_usrreqs, }, #ifdef SCTP @@ -176,9 +165,6 @@ struct protosw inet6sw[] = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, -#ifndef INET /* Do not call initialization and drain routines twice. */ - .pr_drain = sctp_drain, -#endif .pr_usrreqs = &sctp6_usrreqs }, { @@ -187,7 +173,6 @@ struct protosw inet6sw[] = { .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = NULL, /* Covered by the SOCK_SEQPACKET entry. */ .pr_usrreqs = &sctp6_usrreqs }, #endif /* SCTP */ diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 8d8cef359d90..52c70292f920 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -295,6 +295,10 @@ ip6_init(void *arg __unused) IP6PROTO_REGISTER(IPPROTO_SCTP, sctp6_input, sctp6_ctlinput); #endif + EVENTHANDLER_REGISTER(vm_lowmem, frag6_drain, NULL, LOWMEM_PRI_DEFAULT); + EVENTHANDLER_REGISTER(mbuf_lowmem, frag6_drain, NULL, + LOWMEM_PRI_DEFAULT); + netisr_register(&ip6_nh); #ifdef RSS netisr_register(&ip6_direct_nh); diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index 5d6e75abeda1..8c45431c83c3 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -205,6 +205,8 @@ EVENTHANDLER_DECLARE(power_suspend_early, power_change_fn); typedef void (*vm_lowmem_handler_t)(void *, int); #define LOWMEM_PRI_DEFAULT EVENTHANDLER_PRI_FIRST EVENTHANDLER_DECLARE(vm_lowmem, vm_lowmem_handler_t); +/* Some of mbuf(9) zones reached maximum */ +EVENTHANDLER_DECLARE(mbuf_lowmem, vm_lowmem_handler_t); /* Root mounted event */ typedef void (*mountroot_handler_t)(void *); diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 6e46f40c8ad7..2fd7a0b30412 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -52,9 +52,6 @@ struct sockopt; * Each protocol has a handle initializing one of these structures, * which is used for protocol-protocol and system-protocol communication. * - * The system will call the pr_drain entry if it is low on space and - * this should throw away any non-critical data. - * * In retrospect, it would be a lot nicer to use an interface * similar to the vnode VOP interface. */ @@ -65,7 +62,6 @@ struct uio; /* USE THESE FOR YOUR PROTOTYPES ! */ typedef int pr_ctloutput_t(struct socket *, struct sockopt *); -typedef void pr_drain_t(void); typedef void pr_abort_t(struct socket *); typedef int pr_accept_t(struct socket *, struct sockaddr **); typedef int pr_attach_t(struct socket *, int, struct thread *); @@ -117,7 +113,6 @@ struct protosw { /* protocol-protocol hooks */ pr_ctloutput_t *pr_ctloutput; /* control output (from above) */ /* utility hooks */ - pr_drain_t *pr_drain; /* flush any excess space possible */ struct pr_usrreqs *pr_usrreqs; /* user-protocol hook */ }; diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 82ba3c81ef1b..63d31dc1d135 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -87,6 +87,7 @@ extern int vm_pageout_page_count; */ #define VM_LOW_KMEM 0x01 #define VM_LOW_PAGES 0x02 +#define VM_LOW_MBUFS 0x04 /* * Exported routines.