git: ef2a572bf6bd - main - ipsec_offload: kernel infrastructure
Date: Fri, 12 Jul 2024 11:25:08 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=ef2a572bf6bdcac97ef29ce631d2f50f938e1ec8 commit ef2a572bf6bdcac97ef29ce631d2f50f938e1ec8 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2021-08-22 19:38:04 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2024-07-12 04:27:58 +0000 ipsec_offload: kernel infrastructure Inline IPSEC offload moves almost whole IPSEC processing from the CPU/MCU and possibly crypto accelerator, to the network card. The transmitted packet content is not touched by CPU during TX operations, kernel only does the required policy and security association lookups to find out that given flow is offloaded, and then packet is transmitted as plain text to the card. For driver convenience, a metadata is attached to the packet identifying SA which must process the packet. Card does encryption of the payload, padding, calculates authentication, and does the reformat according to the policy. Similarly, on receive, card does the decapsulation, decryption, and authentification. Kernel receives the identifier of SA that was used to process the packet, together with the plain-text packet. Overall, payload octets are only read or written by card DMA engine, removing a lot of memory subsystem overhead, and saving CPU time because IPSEC algos calculations are avoided. If driver declares support for inline IPSEC offload (with the IFCAP2_IPSEC_OFFLOAD capability set and registering method table struct if_ipsec_accel_methods), kernel offers the SPD and SAD to driver. Driver decides which policies and SAs can be offloaded based on hardware capacity, and acks/nacks each SA for given interface to kernel. Kernel needs to keep this information to make a decision to skip software processing on TX, and to assume processing already done on RX. This shadow SPD/SAD database of offloads is rooted from policies (struct secpolicy accel_ifps, struct ifp_handle_sp) and SAs (struct secasvar accel_ipfs, struct ifp_handle_sav). Some extensions to the PF_KEY socket allow to limit interfaces for which given SP/SA could be offloaded (proposed for offload). Also, additional statistics extensions allow to observe allocation/octet/use counters for specific SA. Since SPs and SAs are typically instantiated in non-sleepable context, while offloading them into card is expected to require costly async manipulations of the card state, calls to the driver for offload and termination are executed in the threaded taskqueue. It also solves the issue of allocating resources needed for the offload database. Neither ipf_handle_sp nor ipf_handle_sav do not add reference to the owning SP/SA, the offload must be terminated before last reference is dropped. ipsec_accel only adds transient references to ensure safe pointer ownership by taskqueue. Maintaining the SA counters for hardware-accelerated packets is the duty of the driver. The helper ipsec_accel_drv_sa_lifetime_update() is provided to hide accel infrastructure from drivers which would use expected callout to query hardware periodically for updates. Reviewed by: rscheff (transport, stack integration), np Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44219 --- sys/conf/files | 2 + sys/conf/options | 1 + sys/modules/ipsec/Makefile | 5 +- sys/netipsec/ipsec.c | 17 + sys/netipsec/ipsec.h | 11 + sys/netipsec/ipsec_input.c | 11 + sys/netipsec/ipsec_offload.c | 1061 ++++++++++++++++++++++++++++++++++++++++++ sys/netipsec/ipsec_offload.h | 191 ++++++++ sys/netipsec/ipsec_output.c | 15 + sys/netipsec/ipsec_pcb.c | 38 +- sys/netipsec/key.c | 270 ++++++++++- sys/netipsec/key.h | 6 + sys/netipsec/key_debug.c | 5 + sys/netipsec/keydb.h | 14 + 14 files changed, 1628 insertions(+), 19 deletions(-) diff --git a/sys/conf/files b/sys/conf/files index 609ac407d400..1f99c3586b86 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4464,6 +4464,8 @@ netipsec/ipsec.c optional ipsec inet | ipsec inet6 netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mod.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_offload.c optional ipsec ipsec_offload inet | \ + ipsec ipsec_offload inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 netipsec/ipsec_pcb.c optional ipsec inet | ipsec inet6 | \ ipsec_support inet | ipsec_support inet6 diff --git a/sys/conf/options b/sys/conf/options index f50d009987bc..928927fe99df 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -466,6 +466,7 @@ IPFIREWALL_PMOD opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h +IPSEC_OFFLOAD opt_ipsec.h IPSTEALTH KERN_TLS KRPC diff --git a/sys/modules/ipsec/Makefile b/sys/modules/ipsec/Makefile index 08a2e88d5794..8979508375a4 100644 --- a/sys/modules/ipsec/Makefile +++ b/sys/modules/ipsec/Makefile @@ -2,8 +2,9 @@ .PATH: ${SRCTOP}/sys/net ${SRCTOP}/sys/netipsec KMOD= ipsec -SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ - ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ +SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c \ + ipsec_mod.c ipsec_offload.c ipsec_output.c \ + xform_ah.c xform_esp.c xform_ipcomp.c \ opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_sctp.h .if "${MK_INET}" != "no" || "${MK_INET6}" != "no" SRCS+= udpencap.c diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 0ca33424bca8..e22a3872d48d 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -85,6 +85,7 @@ #ifdef INET6 #include <netipsec/ipsec6.h> #endif +#include <netipsec/ipsec_offload.h> #include <netipsec/ah_var.h> #include <netipsec/esp_var.h> #include <netipsec/ipcomp.h> /*XXX*/ @@ -636,8 +637,16 @@ int ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); @@ -802,8 +811,16 @@ int ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 2a1dcb8bb77b..55cc0839eab9 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -71,6 +71,12 @@ struct ipsecrequest { u_int level; /* IPsec level defined below. */ }; +struct ipsec_accel_adddel_sp_tq { + struct vnet *adddel_vnet; + struct task adddel_task; + int adddel_scheduled; +}; + /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; @@ -102,6 +108,11 @@ struct secpolicy { time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ + CK_LIST_HEAD(, ifp_handle_sp) accel_ifps; + struct ipsec_accel_adddel_sp_tq accel_add_tq; + struct ipsec_accel_adddel_sp_tq accel_del_tq; + struct inpcb *ipsec_accel_add_sp_inp; + const char *accel_ifname; }; /* diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 1150f3f470d3..dbb20748cf45 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -90,6 +90,7 @@ #include <netipsec/esp.h> #include <netipsec/esp_var.h> #include <netipsec/ipcomp_var.h> +#include <netipsec/ipsec_offload.h> #include <netipsec/key.h> #include <netipsec/keydb.h> @@ -237,6 +238,11 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) int ipsec4_input(struct mbuf *m, int offset, int proto) { + int error; + + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); switch (proto) { case IPPROTO_AH: @@ -536,7 +542,12 @@ ipsec6_lasthdr(int proto) int ipsec6_input(struct mbuf *m, int offset, int proto) { + int error; + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); + switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c new file mode 100644 index 000000000000..851bacaf4ea1 --- /dev/null +++ b/sys/netipsec/ipsec_offload.c @@ -0,0 +1,1061 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/ck.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/pctrie.h> +#include <sys/proc.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/taskqueue.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <net/vnet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet/in_pcb.h> + +#include <netipsec/key.h> +#include <netipsec/keydb.h> +#include <netipsec/key_debug.h> +#include <netipsec/xform.h> +#include <netipsec/ipsec.h> +#include <netipsec/ipsec_offload.h> +#include <netipsec/ah_var.h> +#include <netipsec/esp.h> +#include <netipsec/esp_var.h> +#include <netipsec/ipcomp_var.h> + +#ifdef IPSEC_OFFLOAD + +static struct mtx ipsec_accel_sav_tmp; +static struct unrhdr *drv_spi_unr; +static struct mtx ipsec_accel_cnt_lock; + +struct ipsec_accel_install_newkey_tq { + struct secasvar *sav; + struct vnet *install_vnet; + struct task install_task; +}; + +struct ipsec_accel_forget_tq { + struct vnet *forget_vnet; + struct task forget_task; + struct secasvar *sav; +}; + +struct ifp_handle_sav { + CK_LIST_ENTRY(ifp_handle_sav) sav_link; + CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link; + struct secasvar *sav; + struct ifnet *ifp; + void *ifdata; + uint64_t drv_spi; + uint32_t flags; + size_t hdr_ext_size; + uint64_t cnt_octets; + uint64_t cnt_allocs; +}; + +#define IFP_HS_HANDLED 0x00000001 +#define IFP_HS_REJECTED 0x00000002 +#define IFP_HS_INPUT 0x00000004 +#define IFP_HS_OUTPUT 0x00000008 +#define IFP_HS_MARKER 0x00000010 + +static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles; + +struct ifp_handle_sp { + CK_LIST_ENTRY(ifp_handle_sp) sp_link; + CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link; + struct secpolicy *sp; + struct ifnet *ifp; + void *ifdata; + uint32_t flags; +}; + +#define IFP_HP_HANDLED 0x00000001 +#define IFP_HP_REJECTED 0x00000002 +#define IFP_HP_MARKER 0x00000004 + +static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles; + +static void * +drvspi_sa_trie_alloc(struct pctrie *ptree) +{ + void *res; + + res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT); + if (res != NULL) + pctrie_zone_init(res, 0, 0); + return (res); +} + +static void +drvspi_sa_trie_free(struct pctrie *ptree, void *node) +{ + free(node, M_IPSEC_MISC); +} + +PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, + drvspi_sa_trie_alloc, drvspi_sa_trie_free); +static struct pctrie drv_spi_pctrie; + +static void ipsec_accel_sa_newkey_impl(struct secasvar *sav); +static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); +static void ipsec_accel_forget_sav_clear(struct secasvar *sav); +static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, + struct ifnet *ifp); +static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m); +static void ipsec_accel_sync_imp(void); +static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav); +static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); + +static void +ipsec_accel_init(void *arg) +{ + mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0); + mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0); + drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN, + IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp); + ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl; + ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl; + ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl; + ipsec_accel_spddel_p = ipsec_accel_spddel_impl; + ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl; + ipsec_accel_sync_p = ipsec_accel_sync_imp; + ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl; + ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl; + pctrie_init(&drv_spi_pctrie); +} +SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_init, NULL); + +static void +ipsec_accel_fini(void *arg) +{ + ipsec_accel_sa_newkey_p = NULL; + ipsec_accel_forget_sav_p = NULL; + ipsec_accel_spdadd_p = NULL; + ipsec_accel_spddel_p = NULL; + ipsec_accel_sa_lifetime_op_p = NULL; + ipsec_accel_sync_p = NULL; + ipsec_accel_is_accel_sav_p = NULL; + ipsec_accel_key_setaccelif_p = NULL; + ipsec_accel_sync_imp(); + clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ + clear_unrhdr(drv_spi_unr); + delete_unrhdr(drv_spi_unr); + mtx_destroy(&ipsec_accel_sav_tmp); + mtx_destroy(&ipsec_accel_cnt_lock); +} +SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_fini, NULL); + +static void +ipsec_accel_alloc_forget_tq(struct secasvar *sav) +{ + void *ftq; + + if (sav->accel_forget_tq != 0) + return; + + ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK); + if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq)) + free(ftq, M_TEMP); +} + +static bool +ipsec_accel_sa_install_match(if_t ifp, void *arg) +{ + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (false); + if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) { + printf("driver bug ifp %s if_sa_newkey NULL\n", + if_name(ifp)); + return (false); + } + return (true); +} + +static int +ipsec_accel_sa_newkey_cb(if_t ifp, void *arg) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *priv; + u_int drv_spi; + int error; + + tq = arg; + + printf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x " + "flags %#x seq %d\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey, + be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq); + priv = NULL; + drv_spi = alloc_unr(drv_spi_unr); + if (tq->sav->accel_ifname != NULL && + strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + goto out; + } + if (drv_spi == -1) { + /* XXXKIB */ + printf("ipsec_accel_sa_install_newkey: cannot alloc " + "drv_spi if %s spi %#x\n", if_name(ifp), + be32toh(tq->sav->spi)); + return (ENOMEM); + } + error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav, + drv_spi, &priv); + if (error != 0) { + if (error == EOPNOTSUPP) { + printf("ipsec_accel_sa_newkey: driver " + "refused sa if %s spi %#x\n", + if_name(ifp), be32toh(tq->sav->spi)); + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + /* XXXKIB */ + } else { + printf("ipsec_accel_sa_newkey: driver " + "error %d if %s spi %#x\n", + error, if_name(ifp), be32toh(tq->sav->spi)); + /* XXXKIB */ + } + } else { + error = ipsec_accel_handle_sav(tq->sav, ifp, + drv_spi, priv, IFP_HS_HANDLED, NULL); + if (error != 0) { + /* XXXKIB */ + printf("ipsec_accel_sa_newkey: handle_sav " + "err %d if %s spi %#x\n", error, + if_name(ifp), be32toh(tq->sav->spi)); + } + } +out: + return (error); +} + +static void +ipsec_accel_sa_newkey_act(void *context, int pending) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *tqf; + struct secasvar *sav; + + tq = context; + tqf = NULL; + sav = tq->sav; + CURVNET_SET(tq->install_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) == 0 && + sav->state == SADB_SASTATE_MATURE) { + sav->accel_flags |= SADB_KEY_ACCEL_INST; + mtx_unlock(&ipsec_accel_sav_tmp); + if_foreach_sleep(ipsec_accel_sa_install_match, context, + ipsec_accel_sa_newkey_cb, context); + ipsec_accel_alloc_forget_tq(sav); + mtx_lock(&ipsec_accel_sav_tmp); + + /* + * If ipsec_accel_forget_sav() raced with us and set + * the flag, do its work. Its task cannot execute in + * parallel since taskqueue_thread is single-threaded. + */ + if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { + tqf = (void *)sav->accel_forget_tq; + sav->accel_forget_tq = 0; + ipsec_accel_forget_sav_clear(sav); + } + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&tq->sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); + free(tqf, M_TEMP); +} + +static void +ipsec_accel_sa_newkey_impl(struct secasvar *sav) +{ + struct ipsec_accel_install_newkey_tq *tq; + + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) + return; + + printf( + "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n", + be32toh(sav->spi), sav->flags, sav->seq); + + tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); + if (tq == NULL) { + printf("ipsec_accel_sa_install_newkey: no memory for tq, " + "spi %#x\n", be32toh(sav->spi)); + /* XXXKIB */ + return; + } + + refcount_acquire(&sav->refcnt); + + TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq); + tq->sav = sav; + tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ + taskqueue_enqueue(taskqueue_thread, &tq->install_task); +} + +static int +ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires) +{ + struct ifp_handle_sav *ihs, *i; + int error; + + MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1); + + ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO); + ihs->ifp = ifp; + ihs->sav = sav; + ihs->drv_spi = drv_spi; + ihs->ifdata = priv; + ihs->flags = flags; + if ((flags & IFP_HS_OUTPUT) != 0) + ihs->hdr_ext_size = esp_hdrsiz(sav); + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) { + error = EALREADY; + goto errout; + } + } + error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs); + if (error != 0) + goto errout; + if_ref(ihs->ifp); + CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + if (ires != NULL) + *ires = ihs; + return (0); +errout: + mtx_unlock(&ipsec_accel_sav_tmp); + free(ihs, M_IPSEC_MISC); + if (ires != NULL) + *ires = NULL; + return (error); +} + +static void +ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav) +{ + struct ifnet *ifp; + struct secasvar *sav; + + mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED); + + CK_LIST_REMOVE(i, sav_link); + CK_LIST_REMOVE(i, sav_allh_link); + DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + ifp = i->ifp; + sav = i->sav; + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + printf("sa deinstall %s %p spi %#x ifl %#x\n", + if_name(ifp), sav, be32toh(sav->spi), i->flags); + ifp->if_ipsec_accel_m->if_sa_deinstall(ifp, + i->drv_spi, i->ifdata); + } + if_rele(ifp); + free_unr(drv_spi_unr, i->drv_spi); + free(i, M_IPSEC_MISC); + if (freesav) + key_freesav(&sav); + mtx_lock(&ipsec_accel_sav_tmp); +} + +static void +ipsec_accel_forget_sav_clear(struct secasvar *sav) +{ + struct ifp_handle_sav *i; + + for (;;) { + i = CK_LIST_FIRST(&sav->accel_ifps); + if (i == NULL) + break; + ipsec_accel_forget_handle_sav(i, false); + } +} + +static void +ipsec_accel_forget_sav_act(void *arg, int pending) +{ + struct ipsec_accel_forget_tq *tq; + struct secasvar *sav; + + tq = arg; + sav = tq->sav; + CURVNET_SET(tq->forget_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + ipsec_accel_forget_sav_clear(sav); + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); +} + +void +ipsec_accel_forget_sav_impl(struct secasvar *sav) +{ + struct ipsec_accel_forget_tq *tq; + + mtx_lock(&ipsec_accel_sav_tmp); + sav->accel_flags |= SADB_KEY_ACCEL_DEINST; + tq = (void *)atomic_load_ptr(&sav->accel_forget_tq); + if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq, + (uintptr_t)tq, 0)) { + mtx_unlock(&ipsec_accel_sav_tmp); + return; + } + mtx_unlock(&ipsec_accel_sav_tmp); + + refcount_acquire(&sav->refcnt); + TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq); + tq->forget_vnet = curthread->td_vnet; + tq->sav = sav; + taskqueue_enqueue(taskqueue_thread, &tq->forget_task); +} + +static void +ipsec_accel_on_ifdown_sav(struct ifnet *ifp) +{ + struct ifp_handle_sav *i, *marker; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker, + sav_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sav_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sav_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sav_allh_link); + if (i->ifp == ifp) { + refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */ + ipsec_accel_forget_handle_sav(i, true); + } + } + CK_LIST_REMOVE(marker, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp) +{ + struct ifp_handle_sav *i; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (NULL); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp) +{ + NET_EPOCH_ASSERT(); + return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp)); +} + +static bool +ipsec_accel_is_accel_sav_impl(struct secasvar *sav) +{ + return (!CK_LIST_EMPTY(&sav->accel_ifps)); +} + +static struct secasvar * +ipsec_accel_drvspi_to_sa(u_int drv_spi) +{ + struct ifp_handle_sav *i; + + i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi); + if (i == NULL) + return (NULL); + return (i->sav); +} + +static struct ifp_handle_sp * +ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp) +{ + struct ifp_handle_sp *i; + + CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static bool +ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp) +{ + return (ipsec_accel_find_accel_sp(sp, ifp) != NULL); +} + +static int +ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp, + struct ifp_handle_sp **ip) +{ + struct ifp_handle_sp *i; + + i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO); + i->sp = sp; + i->ifp = ifp; + if_ref(ifp); + i->flags = IFP_HP_HANDLED; + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + *ip = i; + return (0); +} + +static bool +ipsec_accel_spdadd_match(if_t ifp, void *arg) +{ + struct secpolicy *sp; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 || + ifp->if_ipsec_accel_m->if_spdadd == NULL) + return (false); + sp = arg; + if (sp->accel_ifname != NULL && + strcmp(sp->accel_ifname, if_name(ifp)) != 0) + return (false); + if (ipsec_accel_is_accel_sp(sp, ifp)) + return (false); + return (true); +} + +static int +ipsec_accel_spdadd_cb(if_t ifp, void *arg) +{ + struct secpolicy *sp; + struct inpcb *inp; + struct ifp_handle_sp *i; + int error; + + sp = arg; + inp = sp->ipsec_accel_add_sp_inp; + printf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp); + error = ipsec_accel_remember_sp(sp, ifp, &i); + if (error != 0) { + printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n", + if_name(ifp), sp, error); + return (error); + } + error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata); + if (error != 0) { + i->flags |= IFP_HP_REJECTED; + printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n", + if_name(ifp), sp, error); + } + return (error); +} + +static void +ipsec_accel_spdadd_act(void *arg, int pending) +{ + struct secpolicy *sp; + struct inpcb *inp; + + sp = arg; + CURVNET_SET(sp->accel_add_tq.adddel_vnet); + if_foreach_sleep(ipsec_accel_spdadd_match, arg, + ipsec_accel_spdadd_cb, arg); + inp = sp->ipsec_accel_add_sp_inp; + if (inp != NULL) { + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); + sp->ipsec_accel_add_sp_inp = NULL; + } + CURVNET_RESTORE(); + key_freesp(&sp); +} + +void +ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + if (sp->tcount == 0 && inp == NULL) + return; + tq = &sp->accel_add_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + sp->ipsec_accel_add_sp_inp = inp; + if (inp != NULL) + in_pcbref(inp); + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_spddel_act(void *arg, int pending) +{ + struct ifp_handle_sp *i; + struct secpolicy *sp; + int error; + + sp = arg; + CURVNET_SET(sp->accel_del_tq.adddel_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + for (;;) { + i = CK_LIST_FIRST(&sp->accel_ifps); + if (i == NULL) + break; + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(i->ifp), sp); + error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp, + sp, i->ifdata); + if (error != 0) { + printf( + "ipsec_accel_spddel: %s if_spddel %p res %d\n", + if_name(i->ifp), sp, error); + } + } + if_rele(i->ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesp(&sp); + CURVNET_RESTORE(); +} + +void +ipsec_accel_spddel_impl(struct secpolicy *sp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + + tq = &sp->accel_del_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_on_ifdown_sp(struct ifnet *ifp) +{ + struct ifp_handle_sp *i, *marker; + struct secpolicy *sp; + int error; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker, + sp_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sp_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sp_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sp_allh_link); + if (i->ifp != ifp) + continue; + + sp = i->sp; + key_addref(sp); + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(ifp), sp); + error = ifp->if_ipsec_accel_m->if_spddel(ifp, + sp, i->ifdata); + } *** 1421 LINES SKIPPED ***