git: 051e7d78b039 - main - Kernel-side infrastructure to implement nvlist-based set/get ifcaps
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 24 May 2022 20:59:43 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=051e7d78b03944d5910d4f7ad2f1fd6f2cfac382 commit 051e7d78b03944d5910d4f7ad2f1fd6f2cfac382 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2021-10-17 15:00:34 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2022-05-24 20:59:32 +0000 Kernel-side infrastructure to implement nvlist-based set/get ifcaps Reviewed by: hselasky, jhb, kp (previous version) Sponsored by: NVIDIA Networking MFC after: 3 weeks Differential revision: https://reviews.freebsd.org/D32551 --- sys/net/if.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- sys/net/if.h | 59 +++++++++++++++++- sys/sys/sockio.h | 3 + 3 files changed, 236 insertions(+), 4 deletions(-) diff --git a/sys/net/if.c b/sys/net/if.c index bc0240035ea3..c50cc2d291e2 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -58,6 +58,7 @@ #include <sys/lock.h> #include <sys/refcount.h> #include <sys/module.h> +#include <sys/nv.h> #include <sys/rwlock.h> #include <sys/sockio.h> #include <sys/syslog.h> @@ -2391,6 +2392,88 @@ ifr_data_get_ptr(void *ifrp) return (ifrup->ifr.ifr_ifru.ifru_data); } +struct ifcap_nv_bit_name { + int cap_bit; + const char *cap_name; +}; +#define CAPNV(x) {.cap_bit = IFCAP_##x, \ + .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) } +const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = { + CAPNV(RXCSUM), + CAPNV(TXCSUM), + CAPNV(NETCONS), + CAPNV(VLAN_MTU), + CAPNV(VLAN_HWTAGGING), + CAPNV(JUMBO_MTU), + CAPNV(POLLING), + CAPNV(VLAN_HWCSUM), + CAPNV(TSO4), + CAPNV(TSO6), + CAPNV(LRO), + CAPNV(WOL_UCAST), + CAPNV(WOL_MCAST), + CAPNV(WOL_MAGIC), + CAPNV(TOE4), + CAPNV(TOE6), + CAPNV(VLAN_HWFILTER), + CAPNV(VLAN_HWTSO), + CAPNV(LINKSTATE), + CAPNV(NETMAP), + CAPNV(RXCSUM_IPV6), + CAPNV(TXCSUM_IPV6), + CAPNV(HWSTATS), + CAPNV(TXRTLMT), + CAPNV(HWRXTSTMP), + CAPNV(MEXTPG), + CAPNV(TXTLS4), + CAPNV(TXTLS6), + CAPNV(VXLAN_HWCSUM), + CAPNV(VXLAN_HWTSO), + CAPNV(TXTLS_RTLMT), + {0, NULL} +}; +#define CAP2NV(x) {.cap_bit = IFCAP2_##x, \ + .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) } +const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { + CAP2NV(RXTLS4), + CAP2NV(RXTLS6), + {0, NULL} +}; +#undef CAPNV +#undef CAP2NV + +int +if_capnv_to_capint(const nvlist_t *nv, int *old_cap, + const struct ifcap_nv_bit_name *nn, bool all) +{ + int i, res; + + res = 0; + for (i = 0; nn[i].cap_name != NULL; i++) { + if (nvlist_exists_bool(nv, nn[i].cap_name)) { + if (all || nvlist_get_bool(nv, nn[i].cap_name)) + res |= nn[i].cap_bit; + } else { + res |= *old_cap & nn[i].cap_bit; + } + } + return (res); +} + +void +if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn, + int ifr_cap, int ifr_req) +{ + int i; + + for (i = 0; nn[i].cap_name != NULL; i++) { + if ((nn[i].cap_bit & ifr_cap) != 0) { + nvlist_add_bool(nv, nn[i].cap_name, + (nn[i].cap_bit & ifr_req) != 0); + } + } +} + /* * Hardware specific interface ioctls. */ @@ -2401,12 +2484,15 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; - size_t descrlen; + size_t descrlen, nvbuflen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8]; struct ifaddr *ifa; struct sockaddr_dl *sdl; + void *buf; + nvlist_t *nvcap; + struct siocsifcapnv_driver_data drv_ioctl_data; ifr = (struct ifreq *)data; switch (cmd) { @@ -2425,6 +2511,47 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) ifr->ifr_curcap = ifp->if_capenable; break; + case SIOCGIFCAPNV: + if ((ifp->if_capabilities & IFCAP_NV) == 0) { + error = EINVAL; + break; + } + buf = NULL; + nvcap = nvlist_create(0); + for (;;) { + if_capint_to_capnv(nvcap, ifcap_nv_bit_names, + ifp->if_capabilities, ifp->if_capenable); + if_capint_to_capnv(nvcap, ifcap2_nv_bit_names, + ifp->if_capabilities2, ifp->if_capenable2); + error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV, + __DECONST(caddr_t, nvcap)); + if (error != 0) { + if_printf(ifp, + "SIOCGIFCAPNV driver mistake: nvlist error %d\n", + error); + break; + } + buf = nvlist_pack(nvcap, &nvbuflen); + if (buf == NULL) { + error = nvlist_error(nvcap); + if (error == 0) + error = EDOOFUS; + break; + } + if (nvbuflen > ifr->ifr_cap_nv.buf_length) { + ifr->ifr_cap_nv.length = nvbuflen; + ifr->ifr_cap_nv.buffer = NULL; + error = EFBIG; + break; + } + ifr->ifr_cap_nv.length = nvbuflen; + error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen); + break; + } + free(buf, M_NVLIST); + nvlist_destroy(nvcap); + break; + case SIOCGIFDATA: { struct if_data ifd; @@ -2563,7 +2690,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); - if (error) + if (error != 0) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); @@ -2574,6 +2701,53 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) getmicrotime(&ifp->if_lastchange); break; + case SIOCSIFCAPNV: + error = priv_check(td, PRIV_NET_SETIFCAP); + if (error != 0) + return (error); + if (ifp->if_ioctl == NULL) + return (EOPNOTSUPP); + if ((ifp->if_capabilities & IFCAP_NV) == 0) + return (EINVAL); + if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) + return (EINVAL); + nvcap = NULL; + buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); + for (;;) { + error = copyin(ifr->ifr_cap_nv.buffer, buf, + ifr->ifr_cap_nv.length); + if (error != 0) + break; + nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0); + if (nvcap == NULL) { + error = EINVAL; + break; + } + drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap, + &ifp->if_capenable, ifcap_nv_bit_names, false); + if ((drv_ioctl_data.reqcap & + ~ifp->if_capabilities) != 0) { + error = EINVAL; + break; + } + drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap, + &ifp->if_capenable2, ifcap2_nv_bit_names, false); + if ((drv_ioctl_data.reqcap2 & + ~ifp->if_capabilities2) != 0) { + error = EINVAL; + break; + } + drv_ioctl_data.nvcap = nvcap; + error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV, + (caddr_t)&drv_ioctl_data); + break; + } + nvlist_destroy(nvcap); + free(buf, M_TEMP); + if (error == 0) + getmicrotime(&ifp->if_lastchange); + break; + #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); diff --git a/sys/net/if.h b/sys/net/if.h index 782e792cf87c..4bf29193e7ce 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -236,7 +236,7 @@ struct if_data { #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ -/* available 0x20000 */ +#define IFCAP_NV 0x20000 /* can do SIOCGIFCAPNV/SIOCSIFCAPNV */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ @@ -260,7 +260,40 @@ struct if_data { #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6) -#define IFCAP_CANTCHANGE (IFCAP_NETMAP) +#define IFCAP_CANTCHANGE (IFCAP_NETMAP | IFCAP_NV) +#define IFCAP_ALLCAPS 0xffffffff + +#define IFCAP_RXCSUM_NAME "RXCSUM" +#define IFCAP_TXCSUM_NAME "TXCSUM" +#define IFCAP_NETCONS_NAME "NETCONS" +#define IFCAP_VLAN_MTU_NAME "VLAN_MTU" +#define IFCAP_VLAN_HWTAGGING_NAME "VLAN_HWTAGGING" +#define IFCAP_JUMBO_MTU_NAME "JUMBO_MTU" +#define IFCAP_POLLING_NAME "POLLING" +#define IFCAP_VLAN_HWCSUM_NAME "VLAN_HWCSUM" +#define IFCAP_TSO4_NAME "TSO4" +#define IFCAP_TSO6_NAME "TSO6" +#define IFCAP_LRO_NAME "LRO" +#define IFCAP_WOL_UCAST_NAME "WOL_UCAST" +#define IFCAP_WOL_MCAST_NAME "WOL_MCAST" +#define IFCAP_WOL_MAGIC_NAME "WOL_MAGIC" +#define IFCAP_TOE4_NAME "TOE4" +#define IFCAP_TOE6_NAME "TOE6" +#define IFCAP_VLAN_HWFILTER_NAME "VLAN_HWFILTER" +#define IFCAP_VLAN_HWTSO_NAME "VLAN_HWTSO" +#define IFCAP_LINKSTATE_NAME "LINKSTATE" +#define IFCAP_NETMAP_NAME "NETMAP" +#define IFCAP_RXCSUM_IPV6_NAME "RXCSUM_IPV6" +#define IFCAP_TXCSUM_IPV6_NAME "TXCSUM_IPV6" +#define IFCAP_HWSTATS_NAME "HWSTATS" +#define IFCAP_TXRTLMT_NAME "TXRTLMT" +#define IFCAP_HWRXTSTMP_NAME "HWRXTSTMP" +#define IFCAP_MEXTPG_NAME "MEXTPG" +#define IFCAP_TXTLS4_NAME "TXTLS4" +#define IFCAP_TXTLS6_NAME "TXTLS6" +#define IFCAP_VXLAN_HWCSUM_NAME "VXLAN_HWCSUM" +#define IFCAP_VXLAN_HWTSO_NAME "VXLAN_HWTSO" +#define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT" #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ @@ -387,6 +420,15 @@ struct ifreq_buffer { void *buffer; }; +struct ifreq_nv_req { + u_int buf_length; /* Total size of buffer, + u_int for ABI struct ifreq */ + u_int length; /* Length of the filled part */ + void *buffer; /* Buffer itself, containing packed nv */ +}; + +#define IFR_CAP_NV_MAXBUFSIZE (2 * 1024 * 1024) + /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter @@ -411,6 +453,7 @@ struct ifreq { int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; + struct ifreq_nv_req ifru_nv; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -434,6 +477,7 @@ struct ifreq { #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ #define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ +#define ifr_cap_nv ifr_ifru.ifru_nv /* nv-based cap interface */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ @@ -605,6 +649,17 @@ MALLOC_DECLARE(M_IFMADDR); extern struct sx ifnet_detach_sxlock; +struct nvlist; +struct ifcap_nv_bit_name; +int if_capnv_to_capint(const struct nvlist *nv, int *old_cap, + const struct ifcap_nv_bit_name *nn, bool all); +void if_capint_to_capnv(struct nvlist *nv, + const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req); +struct siocsifcapnv_driver_data { + int reqcap; + int reqcap2; + struct nvlist *nvcap; +}; #endif #ifndef _KERNEL diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index 93b8af28e171..b9ed4a439995 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -147,4 +147,7 @@ #define SIOCGIFDOWNREASON _IOWR('i', 154, struct ifdownreason) +#define SIOCSIFCAPNV _IOW('i', 155, struct ifreq) /* set IF features */ +#define SIOCGIFCAPNV _IOWR('i', 156, struct ifreq) /* get IF features */ + #endif /* !_SYS_SOCKIO_H_ */