git: 07285bb4c22c - main - tcp: utilize new solisten_clone() and solisten_enqueue()
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 10 Aug 2022 18:10:28 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=07285bb4c22c026a50f69149d5dae03169b15fe4 commit 07285bb4c22c026a50f69149d5dae03169b15fe4 Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2022-08-10 18:09:34 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2022-08-10 18:09:34 +0000 tcp: utilize new solisten_clone() and solisten_enqueue() This streamlines cloning of a socket from a listener. Now we do not drop the inpcb lock during creation of a new socket, do not do useless state transitions, and put a fully initialized socket+inpcb+tcpcb into the listen queue. Before this change, first we would allocate the socket and inpcb+tcpcb via tcp_usr_attach() as TCPS_CLOSED, link them into global list of pcbs, unlock pcb and put this onto incomplete queue (see 6f3caa6d815). Then, after sonewconn() we would lock it again, transition into TCPS_SYN_RECEIVED, insert into inpcb hash, finalize initialization of tcpcb. And then, in call into tcp_do_segment() and upon transition to TCPS_ESTABLISHED call soisconnected(). This call would lock the listening socket once again with a LOR protection sequence and then we would relocate the socket onto the complete queue and only now it is ready for accept(2). Reviewed by: rrs, tuexen Differential revision: https://reviews.freebsd.org/D36064 --- sys/kern/uipc_socket.c | 2 +- sys/netinet/tcp_syncache.c | 130 ++++++++++++++++++++------------------------- sys/netinet/tcp_usrreq.c | 8 ++- sys/sys/socketvar.h | 1 + 4 files changed, 64 insertions(+), 77 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 73725efceb2d..e9fe4dfd7feb 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -455,7 +455,7 @@ soalloc(struct vnet *vnet) * locks, labels, etc. All protocol state is assumed already to have been * torn down (and possibly never set up) by the caller. */ -static void +void sodealloc(struct socket *so) { diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index ed5981bf85d7..df900280e3df 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -804,44 +804,25 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) /* * Ok, create the full blown connection, and set things up * as they would have been set up if we had created the - * connection when the SYN arrived. If we can't create - * the connection, abort it. + * connection when the SYN arrived. */ - so = sonewconn(lso, 0); - if (so == NULL) { - /* - * Drop the connection; we will either send a RST or - * have the peer retransmit its SYN again after its - * RTO and try again. - */ - TCPSTAT_INC(tcps_listendrop); - if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { - log(LOG_DEBUG, "%s; %s: Socket create failed " - "due to limits or memory shortage\n", - s, __func__); - free(s, M_TCPLOG); - } - goto abort2; - } + if ((so = solisten_clone(lso)) == NULL) + goto allocfail; #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif - + error = in_pcballoc(so, &V_tcbinfo); + if (error) { + sodealloc(so); + goto allocfail; + } inp = sotoinpcb(so); - inp->inp_inc.inc_fibnum = so->so_fibnum; - INP_WLOCK(inp); - /* - * Exclusive pcbinfo lock is not required in syncache socket case even - * if two inpcb locks can be acquired simultaneously: - * - the inpcb in LISTEN state, - * - the newly created inp. - * - * In this case, an inp cannot be at same time in LISTEN state and - * just created by an accept() call. - */ - INP_HASH_WLOCK(&V_tcbinfo); - - /* Insert new socket into PCB hash list. */ + if ((tp = tcp_newtcpcb(inp)) == NULL) { + in_pcbdetach(inp); + in_pcbfree(inp); + sodealloc(so); + goto allocfail; + } inp->inp_inc.inc_flags = sc->sc_inc.inc_flags; #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { @@ -904,16 +885,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) laddr6 = inp->in6p_laddr; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = sc->sc_inc.inc6_laddr; - if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6, - thread0.td_ucred, m, false)) != 0) { + INP_HASH_WLOCK(&V_tcbinfo); + error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6, + thread0.td_ucred, m, false); + INP_HASH_WUNLOCK(&V_tcbinfo); + if (error != 0) { inp->in6p_laddr = laddr6; - if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { - log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed " - "with error %i\n", - s, __func__, error); - free(s, M_TCPLOG); - } - INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } /* Override flowlabel from in6_pcbconnect. */ @@ -944,16 +921,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) laddr = inp->inp_laddr; if (inp->inp_laddr.s_addr == INADDR_ANY) inp->inp_laddr = sc->sc_inc.inc_laddr; - if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin, - thread0.td_ucred, false)) != 0) { + INP_HASH_WLOCK(&V_tcbinfo); + error = in_pcbconnect(inp, (struct sockaddr *)&sin, + thread0.td_ucred, false); + INP_HASH_WUNLOCK(&V_tcbinfo); + if (error != 0) { inp->inp_laddr = laddr; - if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { - log(LOG_DEBUG, "%s; %s: in_pcbconnect failed " - "with error %i\n", - s, __func__, error); - free(s, M_TCPLOG); - } - INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } } @@ -963,9 +936,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0) printf("syncache_socket: could not copy policy\n"); #endif - INP_HASH_WUNLOCK(&V_tcbinfo); - tp = intotcpcb(inp); - tcp_state_change(tp, TCPS_SYN_RECEIVED); + tp->t_state = TCPS_SYN_RECEIVED; tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; tp->t_port = sc->sc_port; @@ -1066,13 +1037,37 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); TCPSTAT_INC(tcps_accepts); + TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, TCPS_LISTEN); + + solisten_enqueue(so, SS_ISCONNECTED); + return (so); +allocfail: + /* + * Drop the connection; we will either send a RST or have the peer + * retransmit its SYN again after its RTO and try again. + */ + if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Socket create failed " + "due to limits or memory shortage\n", + s, __func__); + free(s, M_TCPLOG); + } + TCPSTAT_INC(tcps_listendrop); + return (NULL); + abort: - INP_WUNLOCK(inp); -abort2: - if (so != NULL) - soabort(so); + in_pcbdetach(inp); + in_pcbfree(inp); + sodealloc(so); + if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: in%s_pcbconnect failed with error %i\n", + s, __func__, (sc->sc_inc.inc_flags & INC_ISIPV6) ? "6" : "", + error); + free(s, M_TCPLOG); + } + TCPSTAT_INC(tcps_listendrop); return (NULL); } @@ -1176,6 +1171,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, return (-1); /* Do not send RST */ } #endif /* TCP_SIGNATURE */ + TCPSTATES_INC(TCPS_SYN_RECEIVED); } else { if (sc->sc_port != port) { SCH_UNLOCK(sch); @@ -1282,17 +1278,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, return (-1); /* Do not send RST */ } } - - /* - * Pull out the entry to unlock the bucket row. - * - * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not - * tcp_state_change(). The tcpcb is not existent at this - * moment. A new one will be allocated via syncache_socket-> - * sonewconn->tcp_usr_attach in TCPS_CLOSED state, then - * syncache_socket() will change it to TCPS_SYN_RECEIVED. - */ - TCPSTATES_DEC(TCPS_SYN_RECEIVED); TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; #ifdef TCP_OFFLOAD @@ -1340,8 +1325,11 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, syncache_free(sc); return (1); failed: - if (sc != NULL && sc != &scs) - syncache_free(sc); + if (sc != NULL) { + TCPSTATES_DEC(TCPS_SYN_RECEIVED); + if (sc != &scs) + syncache_free(sc); + } if (s != NULL) free(s, M_TCPLOG); *lsop = NULL; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 9280e4310bc3..d77f37efad7c 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -172,11 +172,9 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td) KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL")); TCPDEBUG1(); - if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { - error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); - if (error) - goto out; - } + error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace); + if (error) + goto out; so->so_rcv.sb_flags |= SB_AUTOSIZE; so->so_snd.sb_flags |= SB_AUTOSIZE; diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index ed797c6d6239..678642eeff6d 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -498,6 +498,7 @@ int soreceive_generic(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); void sorele_locked(struct socket *so); +void sodealloc(struct socket *); int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); void sorflush(struct socket *so); int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,