git: ec52ff6d1411 - main - Streamline the infiniband code according to the ethernet code.
Ryan Libby
rlibby at freebsd.org
Thu Dec 31 02:58:49 UTC 2020
On Tue, Dec 29, 2020 at 9:03 AM Hans Petter Selasky
<hselasky at freebsd.org> wrote:
>
> The branch main has been updated by hselasky:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=ec52ff6d14117573afef970604d5bf6b9691bc88
>
> commit ec52ff6d14117573afef970604d5bf6b9691bc88
> Author: Hans Petter Selasky <hselasky at FreeBSD.org>
> AuthorDate: 2020-12-29 17:01:57 +0000
> Commit: Hans Petter Selasky <hselasky at FreeBSD.org>
> CommitDate: 2020-12-29 17:01:57 +0000
>
> Streamline the infiniband code according to the ethernet code.
>
> Specifically implement the if_requestencap callback function for infiniband.
> Most of the changes are simply a cut and paste of the equivalent ethernet part.
>
> Reviewed by: melifaro @
> Differential Revision: https://reviews.freebsd.org/D27631
> MFC after: 1 week
> Sponsored by: Mellanox Technologies // NVIDIA Networking
> ---
> sys/net/if_infiniband.c | 303 ++++++++++++++++++++++++++++++++----------------
> 1 file changed, 201 insertions(+), 102 deletions(-)
>
> diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c
> index 1d6d561c4d83..bf33457b0a4f 100644
> --- a/sys/net/if_infiniband.c
> +++ b/sys/net/if_infiniband.c
> @@ -143,141 +143,240 @@ infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
> mb->m_pkthdr.len += sizeof(*ibh);
> }
>
> +static void
> +update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
> +{
> + int csum_flags = 0;
> +
> + if (src->m_pkthdr.csum_flags & CSUM_IP)
> + csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
> + if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
> + csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
> + if (src->m_pkthdr.csum_flags & CSUM_SCTP)
> + csum_flags |= CSUM_SCTP_VALID;
> + dst->m_pkthdr.csum_flags |= csum_flags;
> + if (csum_flags & CSUM_DATA_VALID)
> + dst->m_pkthdr.csum_data = 0xffff;
> +}
> +
> /*
> - * Infiniband output routine.
> + * Handle link-layer encapsulation requests.
> */
> static int
> -infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
> - struct route *ro)
> +infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req)
> {
> - uint8_t edst[INFINIBAND_ADDR_LEN];
> -#if defined(INET) || defined(INET6)
> - struct llentry *lle = NULL;
> -#endif
> - struct infiniband_header *ibh;
> - int error = 0;
> - uint16_t type;
> - bool is_gw;
> + struct infiniband_header *ih;
> + struct arphdr *ah;
> + uint16_t etype;
> + const uint8_t *lladdr;
>
> - NET_EPOCH_ASSERT();
> -
> - is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
> + if (req->rtype != IFENCAP_LL)
> + return (EOPNOTSUPP);
>
> -#ifdef MAC
> - error = mac_ifnet_check_transmit(ifp, m);
> - if (error)
> - goto bad;
> -#endif
> + if (req->bufsize < INFINIBAND_HDR_LEN)
> + return (ENOMEM);
>
> - M_PROFILE(m);
> - if (ifp->if_flags & IFF_MONITOR) {
> - error = ENETDOWN;
> - goto bad;
> - }
> - if (!((ifp->if_flags & IFF_UP) &&
> - (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
> - error = ENETDOWN;
> - goto bad;
> - }
> + ih = (struct infiniband_header *)req->buf;
> + lladdr = req->lladdr;
> + req->lladdr_off = 0;
>
> - switch (dst->sa_family) {
> - case AF_LINK:
> - goto output;
> -#ifdef INET
> + switch (req->family) {
> case AF_INET:
> - if (lle != NULL && (lle->la_flags & LLE_VALID)) {
> - memcpy(edst, lle->ll_addr, sizeof(edst));
> - } else if (m->m_flags & M_MCAST) {
> - infiniband_ipv4_multicast_map(
> - ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
> - ifp->if_broadcastaddr, edst);
> - } else {
> - error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
> - if (error) {
> - if (error == EWOULDBLOCK)
> - error = 0;
> - m = NULL; /* mbuf is consumed by resolver */
> - goto bad;
> - }
> - }
> - type = htons(ETHERTYPE_IP);
> + etype = htons(ETHERTYPE_IP);
> break;
> - case AF_ARP: {
> - struct arphdr *ah;
> -
> - if (m->m_len < sizeof(*ah)) {
> - error = EINVAL;
> - goto bad;
> - }
> -
> - ah = mtod(m, struct arphdr *);
> -
> - if (m->m_len < arphdr_len(ah)) {
> - error = EINVAL;
> - goto bad;
> - }
> + case AF_INET6:
> + etype = htons(ETHERTYPE_IPV6);
> + break;
> + case AF_ARP:
> + ah = (struct arphdr *)req->hdata;
> ah->ar_hrd = htons(ARPHRD_INFINIBAND);
>
> switch (ntohs(ah->ar_op)) {
> case ARPOP_REVREQUEST:
> case ARPOP_REVREPLY:
> - type = htons(ETHERTYPE_REVARP);
> + etype = htons(ETHERTYPE_REVARP);
> break;
> case ARPOP_REQUEST:
> case ARPOP_REPLY:
> default:
> - type = htons(ETHERTYPE_ARP);
> + etype = htons(ETHERTYPE_ARP);
> break;
> }
>
> - if (m->m_flags & M_BCAST) {
> - memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
> + if (req->flags & IFENCAP_FLAG_BROADCAST)
> + lladdr = ifp->if_broadcastaddr;
> + break;
> + default:
> + return (EAFNOSUPPORT);
> + }
> +
> + ih->ib_protocol = etype;
> + ih->ib_reserved = 0;
> + memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN);
> + req->bufsize = sizeof(struct infiniband_header);
> +
> + return (0);
> +}
> +
> +static int
> +infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
> + const struct sockaddr *dst, struct route *ro, uint8_t *phdr,
> + uint32_t *pflags, struct llentry **plle)
> +{
> + struct infiniband_header *ih;
> + uint32_t lleflags = 0;
> + int error = 0;
> +
> + if (plle)
> + *plle = NULL;
> + ih = (struct infiniband_header *)phdr;
> +
> + switch (dst->sa_family) {
> +#ifdef INET
> + case AF_INET:
> + if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
> + error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle);
> } else {
> - if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
> - error = EINVAL;
> - goto bad;
> + if (m->m_flags & M_BCAST) {
> + memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr,
> + INFINIBAND_ADDR_LEN);
> + } else {
> + infiniband_ipv4_multicast_map(
> + ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
> + ifp->if_broadcastaddr, ih->ib_hwaddr);
> }
> - memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
> + ih->ib_protocol = htons(ETHERTYPE_IP);
> + ih->ib_reserved = 0;
> }
> break;
> - }
> #endif
> #ifdef INET6
> - case AF_INET6: {
> - const struct ip6_hdr *ip6;
> -
> - ip6 = mtod(m, const struct ip6_hdr *);
> - if (m->m_len < sizeof(*ip6)) {
> - error = EINVAL;
> - goto bad;
> - } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
> - memcpy(edst, lle->ll_addr, sizeof(edst));
> - } else if (m->m_flags & M_MCAST) {
> + case AF_INET6:
> + if ((m->m_flags & M_MCAST) == 0) {
> + error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, plle);
> + } else {
> infiniband_ipv6_multicast_map(
> &((const struct sockaddr_in6 *)dst)->sin6_addr,
> - ifp->if_broadcastaddr, edst);
> - } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
> - memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
> - } else {
> - error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
> - if (error) {
> - if (error == EWOULDBLOCK)
> - error = 0;
> - m = NULL; /* mbuf is consumed by resolver */
> - goto bad;
> - }
> + ifp->if_broadcastaddr, ih->ib_hwaddr);
> + ih->ib_protocol = htons(ETHERTYPE_IPV6);
> + ih->ib_reserved = 0;
> }
> - type = htons(ETHERTYPE_IPV6);
> break;
> - }
> #endif
> default:
> - error = EAFNOSUPPORT;
> + if_printf(ifp, "can't handle af%d\n", dst->sa_family);
> + if (m != NULL)
> + m_freem(m);
> + return (EAFNOSUPPORT);
> + }
> +
> + if (error == EHOSTDOWN) {
> + if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
> + error = EHOSTUNREACH;
> + }
> +
> + if (error != 0)
> + return (error);
> +
> + *pflags = RT_MAY_LOOP;
> + if (lleflags & LLE_IFADDR)
> + *pflags |= RT_L2_ME;
> +
> + return (0);
> +}
> +
> +/*
> + * Infiniband output routine.
> + */
> +static int
> +infiniband_output(struct ifnet *ifp, struct mbuf *m,
> + const struct sockaddr *dst, struct route *ro)
> +{
> + uint8_t linkhdr[INFINIBAND_HDR_LEN];
> + uint8_t *phdr;
> +#if defined(INET) || defined(INET6)
> + struct llentry *lle = NULL;
> +#endif
This broke tinderbox via the LINT-NOIP kernels. The code compiles fine
with the ifdef guard removed. Can we just delete it?
> + struct infiniband_header *ih;
> + int error = 0;
> + int hlen; /* link layer header length */
> + uint32_t pflags;
> + bool addref;
> +
> + NET_EPOCH_ASSERT();
> +
> + addref = false;
> + phdr = NULL;
> + pflags = 0;
> + if (ro != NULL) {
> + /* XXX BPF uses ro_prepend */
> + if (ro->ro_prepend != NULL) {
> + phdr = ro->ro_prepend;
> + hlen = ro->ro_plen;
> + } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
> + if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
> + lle = ro->ro_lle;
> + if (lle != NULL &&
> + (lle->la_flags & LLE_VALID) == 0) {
> + LLE_FREE(lle);
> + lle = NULL; /* redundant */
> + ro->ro_lle = NULL;
> + }
> + if (lle == NULL) {
> + /* if we lookup, keep cache */
> + addref = 1;
> + } else
> + /*
> + * Notify LLE code that
> + * the entry was used
> + * by datapath.
> + */
> + llentry_mark_used(lle);
> + }
> + if (lle != NULL) {
> + phdr = lle->r_linkdata;
> + hlen = lle->r_hdrlen;
> + pflags = lle->r_flags;
> + }
> + }
> + }
> +
> +#ifdef MAC
> + error = mac_ifnet_check_transmit(ifp, m);
> + if (error)
> + goto bad;
> +#endif
> +
> + M_PROFILE(m);
> + if (ifp->if_flags & IFF_MONITOR) {
> + error = ENETDOWN;
> + goto bad;
> + }
> + if (!((ifp->if_flags & IFF_UP) &&
> + (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
> + error = ENETDOWN;
> goto bad;
> }
>
> + if (phdr == NULL) {
> + /* No prepend data supplied. Try to calculate ourselves. */
> + phdr = linkhdr;
> + hlen = INFINIBAND_HDR_LEN;
> + error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
> + addref ? &lle : NULL);
> + if (addref && lle != NULL)
> + ro->ro_lle = lle;
> + if (error != 0)
> + return (error == EWOULDBLOCK ? 0 : error);
> + }
> +
> + if ((pflags & RT_L2_ME) != 0) {
> + update_mbuf_csumflags(m, m);
> + return (if_simloop(ifp, m, dst->sa_family, 0));
> + }
> +
> /*
> - * Add local net header. If no space in first mbuf,
> + * Add local infiniband header. If no space in first mbuf,
> * allocate another.
> */
> M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
> @@ -285,16 +384,15 @@ infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
> error = ENOBUFS;
> goto bad;
> }
> - ibh = mtod(m, struct infiniband_header *);
> -
> - ibh->ib_protocol = type;
> - memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
> + if ((pflags & RT_HAS_HEADER) == 0) {
> + ih = mtod(m, struct infiniband_header *);
> + memcpy(ih, phdr, hlen);
> + }
>
> /*
> * Queue message on interface, update output statistics if
> * successful, and start output if interface not yet active.
> */
> -output:
> return (ifp->if_transmit(ifp, m));
> bad:
> if (m != NULL)
> @@ -484,6 +582,7 @@ infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
> ifp->if_output = infiniband_output;
> ifp->if_input = infiniband_input;
> ifp->if_resolvemulti = infiniband_resolvemulti;
> + ifp->if_requestencap = infiniband_requestencap;
>
> if (ifp->if_baudrate == 0)
> ifp->if_baudrate = IF_Gbps(10); /* default value */
More information about the dev-commits-src-main
mailing list