Re: git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif
- Reply: Gleb Smirnoff : "Re: git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif"
- Reply: Gleb Smirnoff : "Re: git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif"
- In reply to: Gleb Smirnoff : "git: e1882428dcbb - main - ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 27 Jan 2022 16:59:03 UTC
On 1/27/22, Gleb Smirnoff <glebius@freebsd.org> wrote: > The branch main has been updated by glebius: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=e1882428dcbbafd2814d7e17b977a8f686784b39 > > commit e1882428dcbbafd2814d7e17b977a8f686784b39 > Author: Gleb Smirnoff <glebius@FreeBSD.org> > AuthorDate: 2022-01-27 05:58:50 +0000 > Commit: Gleb Smirnoff <glebius@FreeBSD.org> > CommitDate: 2022-01-27 05:58:50 +0000 > > ifnet/mbuf: provide KPI to serialize/restore m->m_pkthdr.rcvif > > Supplement ifindex table with generation count and use it to > serialize & restore an ifnet pointer. > This commit or something from the series reliably causes my box to crash on boot: Fatal trap 12: page fault while in kernel mode cpuid = 7; apic id = 07 fault virtual address = 0x54 fault code = supervisor read data, page not present instruction pointer = 0x20:0xffffffff80616d98 stack pointer = 0x28:0xfffffe06462889e0 frame pointer = 0x28:0xfffffe06462889e0 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 1855 (route) trap number = 12 panic: page fault cpuid = 7 time = 1643302734 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe06462887a0 vpanic() at vpanic+0x17f/frame 0xfffffe06462887f0 panic() at panic+0x43/frame 0xfffffe0646288850 trap_fatal() at trap_fatal+0x385/frame 0xfffffe06462888b0 trap_pfault() at trap_pfault+0x4f/frame 0xfffffe0646288910 calltrap() at calltrap+0x8/frame 0xfffffe0646288910 --- trap 0xc, rip = 0xffffffff80616d98, rsp = 0xfffffe06462889e0, rbp = 0xfffffe06462889e0 --- m_rcvif_serialize() at m_rcvif_serialize+0x8/frame 0xfffffe06462889e0 netisr_queue_src() at netisr_queue_src+0x15c/frame 0xfffffe0646288a30 route_output() at route_output+0x890/frame 0xfffffe0646288c70 sosend_generic() at sosend_generic+0x456/frame 0xfffffe0646288d10 soo_write() at soo_write+0x43/frame 0xfffffe0646288d40 dofilewrite() at dofilewrite+0x81/frame 0xfffffe0646288d90 sys_write() at sys_write+0xbc/frame 0xfffffe0646288e00 amd64_syscall() at amd64_syscall+0x107/frame 0xfffffe0646288f30 fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe0646288f30 --- syscall (4, FreeBSD ELF64, sys_write), rip = 0x3b000ba6703a, rsp = 0x3b0009378e48, rbp = 0x3b00093796f0 --- KDB: enter: panic [ thread pid 1855 tid 102265 ] Stopped at kdb_enter+0x37: movq $0,0x9cc17e(%rip) > Reviewed by: kp > Differential revision: https://reviews.freebsd.org/D33266 > Fun note: git show e6abef09187a > --- > sys/kern/kern_mbuf.c | 22 ++++++++++++++++++++++ > sys/net/if.c | 49 > ++++++++++++++++++++++++++++++++++++------------- > sys/net/if_var.h | 9 ++++++++- > sys/sys/mbuf.h | 6 ++++++ > 4 files changed, 72 insertions(+), 14 deletions(-) > > diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c > index f1e76ef00c65..5c69f663c0e2 100644 > --- a/sys/kern/kern_mbuf.c > +++ b/sys/kern/kern_mbuf.c > @@ -1635,6 +1635,28 @@ m_snd_tag_destroy(struct m_snd_tag *mst) > counter_u64_add(snd_tag_count, -1); > } > > +void > +m_rcvif_serialize(struct mbuf *m) > +{ > + u_short idx, gen; > + > + M_ASSERTPKTHDR(m); > + idx = m->m_pkthdr.rcvif->if_index; > + gen = m->m_pkthdr.rcvif->if_idxgen; > + m->m_pkthdr.rcvidx = idx; > + m->m_pkthdr.rcvgen = gen; > +} > + > +struct ifnet * > +m_rcvif_restore(struct mbuf *m) > +{ > + > + M_ASSERTPKTHDR(m); > + > + return ((m->m_pkthdr.rcvif = ifnet_byindexgen(m->m_pkthdr.rcvidx, > + m->m_pkthdr.rcvgen))); > +} > + > /* > * Allocate an mbuf with anonymous external pages. > */ > diff --git a/sys/net/if.c b/sys/net/if.c > index f148ae8c9c6d..e8d65e64518a 100644 > --- a/sys/net/if.c > +++ b/sys/net/if.c > @@ -313,7 +313,10 @@ VNET_DEFINE(struct ifgrouphead, ifg_head); > /* Table of ifnet by index. */ > static int if_index; > static int if_indexlim = 8; > -static struct ifnet **ifindex_table; > +static struct ifindex_entry { > + struct ifnet *ife_ifnet; > + uint16_t ife_gencnt; > +} *ifindex_table; > > SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, > CTLFLAG_RW | CTLFLAG_MPSAFE, 0, > @@ -325,8 +328,8 @@ sysctl_ifcount(SYSCTL_HANDLER_ARGS) > > IFNET_RLOCK(); > for (int i = 1; i <= if_index; i++) > - if (ifindex_table[i] != NULL && > - ifindex_table[i]->if_vnet == curvnet) > + if (ifindex_table[i].ife_ifnet != NULL && > + ifindex_table[i].ife_ifnet->if_vnet == curvnet) > rv = i; > IFNET_RUNLOCK(); > > @@ -370,7 +373,7 @@ ifnet_byindex(u_int idx) > if (__predict_false(idx > if_index)) > return (NULL); > > - ifp = ck_pr_load_ptr(&ifindex_table[idx]); > + ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); > > if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet) > ifp = NULL; > @@ -391,6 +394,24 @@ ifnet_byindex_ref(u_int idx) > return (ifp); > } > > +struct ifnet * > +ifnet_byindexgen(uint16_t idx, uint16_t gen) > +{ > + struct ifnet *ifp; > + > + NET_EPOCH_ASSERT(); > + > + if (__predict_false(idx > if_index)) > + return (NULL); > + > + ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); > + > + if (ifindex_table[idx].ife_gencnt == gen) > + return (ifp); > + else > + return (NULL); > +} > + > struct ifaddr * > ifaddr_byindex(u_short idx) > { > @@ -571,13 +592,13 @@ if_alloc_domain(u_char type, int numa_domain) > * next slot. > */ > for (idx = 1; idx <= if_index; idx++) { > - if (ifindex_table[idx] == NULL) > + if (ifindex_table[idx].ife_ifnet == NULL) > break; > } > > /* Catch if_index overflow. */ > if (idx >= if_indexlim) { > - struct ifnet **new, **old; > + struct ifindex_entry *new, *old; > int newlim; > > newlim = if_indexlim * 2; > @@ -593,7 +614,8 @@ if_alloc_domain(u_char type, int numa_domain) > if_index = idx; > > ifp->if_index = idx; > - ck_pr_store_ptr(&ifindex_table[idx], ifp); > + ifp->if_idxgen = ifindex_table[idx].ife_gencnt; > + ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp); > IFNET_WUNLOCK(); > > return (ifp); > @@ -668,9 +690,10 @@ if_free(struct ifnet *ifp) > * virtualized and interface would outlive the vnet. > */ > IFNET_WLOCK(); > - MPASS(ifindex_table[ifp->if_index] == ifp); > - ck_pr_store_ptr(&ifindex_table[ifp->if_index], NULL); > - while (if_index > 0 && ifindex_table[if_index] == NULL) > + MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); > + ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL); > + ifindex_table[ifp->if_index].ife_gencnt++; > + while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL) > if_index--; > IFNET_WUNLOCK(); > > @@ -819,7 +842,7 @@ if_attach_internal(struct ifnet *ifp, bool vmove) > struct sockaddr_dl *sdl; > struct ifaddr *ifa; > > - MPASS(ifindex_table[ifp->if_index] == ifp); > + MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); > > #ifdef VIMAGE > ifp->if_vnet = curvnet; > @@ -4508,8 +4531,8 @@ if_show_ifnet(struct ifnet *ifp) > IF_DB_PRINTF("%d", if_dunit); > IF_DB_PRINTF("%s", if_description); > IF_DB_PRINTF("%u", if_index); > + IF_DB_PRINTF("%d", if_idxgen); > IF_DB_PRINTF("%u", if_refcount); > - IF_DB_PRINTF("%d", if_index_reserved); > IF_DB_PRINTF("%p", if_softc); > IF_DB_PRINTF("%p", if_l2com); > IF_DB_PRINTF("%p", if_llsoftc); > @@ -4564,7 +4587,7 @@ DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) > u_short idx; > > for (idx = 1; idx <= if_index; idx++) { > - ifp = ifindex_table[idx]; > + ifp = ifindex_table[idx].ife_ifnet; > if (ifp == NULL) > continue; > db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); > diff --git a/sys/net/if_var.h b/sys/net/if_var.h > index dedc73718125..21b3687f62c1 100644 > --- a/sys/net/if_var.h > +++ b/sys/net/if_var.h > @@ -334,7 +334,7 @@ struct ifnet { > const char *if_dname; /* driver name */ > int if_dunit; /* unit or IF_DUNIT_NONE */ > u_short if_index; /* numeric abbreviation for this if */ > - short if_index_reserved; /* spare space to grow if_index */ > + u_short if_idxgen; /* ... and its generation count */ > char if_xname[IFNAMSIZ]; /* external name (name + unit) */ > char *if_description; /* interface description */ > > @@ -644,6 +644,13 @@ extern struct sx ifnet_sxlock; > struct ifnet *ifnet_byindex(u_int); > struct ifnet *ifnet_byindex_ref(u_int); > > +/* > + * ifnet_byindexgen() looks up ifnet by index and generation count, > + * attempting to restore a weak pointer that had been stored across > + * the epoch. > + */ > +struct ifnet *ifnet_byindexgen(uint16_t idx, uint16_t gen); > + > /* > * Given the index, ifaddr_byindex() returns the one and only > * link-level ifaddr for the interface. You are not supposed to use > diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h > index 77364f428b12..ebe8ef205055 100644 > --- a/sys/sys/mbuf.h > +++ b/sys/sys/mbuf.h > @@ -159,6 +159,10 @@ struct pkthdr { > union { > struct m_snd_tag *snd_tag; /* send tag, if any */ > struct ifnet *rcvif; /* rcv interface */ > + struct { > + uint16_t rcvidx; /* rcv interface index ... */ > + uint16_t rcvgen; /* ... and generation count */ > + }; > }; > SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ > int32_t len; /* total packet length */ > @@ -862,6 +866,8 @@ int m_snd_tag_alloc(struct ifnet *, > void m_snd_tag_init(struct m_snd_tag *, struct ifnet *, > const struct if_snd_tag_sw *); > void m_snd_tag_destroy(struct m_snd_tag *); > +void m_rcvif_serialize(struct mbuf *); > +struct ifnet *m_rcvif_restore(struct mbuf *); > > static __inline int > m_gettype(int size) > -- Mateusz Guzik <mjguzik gmail.com>