[Bug 281990] offset of sa_family in sockaddr_ib inconsistent with sockaddr

From: <bugzilla-noreply_at_freebsd.org>
Date: Thu, 10 Oct 2024 13:30:25 UTC
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=281990

            Bug ID: 281990
           Summary: offset of sa_family in sockaddr_ib inconsistent with
                    sockaddr
           Product: Base System
           Version: 14.1-RELEASE
          Hardware: amd64
                OS: Any
            Status: New
          Severity: Affects Some People
          Priority: ---
         Component: bin
          Assignee: bugs@FreeBSD.org
          Reporter: bmueller@panasas.com

My system has a RoCE-enabled Broadcom NIC that uses the bnxt_re driver.  To
test whether libfabric can see the device, I ran 'fi_info -p verbs' which trips
an assert in the libfabric library.

(gdb) where
#0  0x00000008007d810a in thr_kill () from /lib/libc.so.7
#1  0x0000000800751404 in raise () from /lib/libc.so.7
#2  0x00000008008049d9 in abort () from /lib/libc.so.7
#3  0x00000008007345f1 in __assert () from /lib/libc.so.7
#4  0x0000000800502253 in ofi_addr_set_port (addr=0x800e27510, port=0) at
./include/ofi_net.h:832
#5  0x000000080050557e in vrb_alloc_ib_addrinfo (port_num=1 '\001',
gid=0x7fffffffe3f0, pkey=65535) at prov/verbs/src/verbs_info.c:1045
#6  0x00000008005057b4 in vrb_get_sib (verbs_devs=0x8005f4380 <verbs_devs>) at
prov/verbs/src/verbs_info.c:1096
#7  0x0000000800506380 in vrb_init_info (all_infos=0x8005f3e08
<vrb_util_prov+8>) at prov/verbs/src/verbs_info.c:1400
#8  0x0000000800507c99 in vrb_getinfo (version=65555, node=0x0, service=0x0,
flags=0, hints=0x800e1b000, info=0x7fffffffe640) at
prov/verbs/src/verbs_info.c:1892
#9  0x000000080045fa2f in fi_getinfo_ (version=65555, node=0x0, service=0x0,
flags=0, hints=0x800e1b000, info=0x7fffffffe6b0) at src/fabric.c:1279
#10 0x0000000000401e22 in run (hints=0x800e1b000, node=0x0, port=0x0, flags=0)
at util/info.c:323
#11 0x000000000040227d in main (argc=3, argv=0x7fffffffe790) at util/info.c:447
(gdb) frame 5
#5  0x000000080050557e in vrb_alloc_ib_addrinfo (port_num=1 '\001',
gid=0x7fffffffe3f0, pkey=65535) at prov/verbs/src/verbs_info.c:1045
1045            ofi_addr_set_port((struct sockaddr *)sib, 0);
(gdb) x/16xb sib
0x800e27510:    0x1b    0x00    0xff    0xff    0x00    0x00    0x00    0x00
0x800e27518:    0xfe    0x80    0x00    0x00    0x00    0x00    0x00    0x00

The code below expects to be able to cast a sockaddr_ib to a sockaddr so it can
call appropriate branch of switch statement based on the value of sa_family. 
This code only works properly if sa_family is at the same offset in both
structures.

vrb_alloc_ib_addrinfo(...)
{
  struct sockaddr_ib *sib;
  ...
  ofi_addr_set_port((struct sockaddr *)sib, 0);
}

static inline void ofi_addr_set_port(struct sockaddr *addr, uint16_t port)
{
    struct ofi_sockaddr_ib *sib;

    switch (ofi_sa_family(addr)) {
    case AF_INET:
        ofi_sin_port(addr) = htons(port);
        break;
    case AF_INET6:
        ofi_sin6_port(addr) = htons(port);
        break;
    case AF_IB:
        sib = (struct ofi_sockaddr_ib *)addr;
        sib->sib_sid = htonll(((uint64_t)OFI_RDMA_PS_IB << 16) + ntohs(port));
        sib->sib_sid_mask = htonll(OFI_IB_IP_PS_MASK | OFI_IB_IP_PORT_MASK);
        break;
    default:
        FI_WARN(&core_prov, FI_LOG_FABRIC, "Unknown address format\n");
        assert(0);
    }
}

#define ofi_sa_family(addr) ((struct sockaddr *)(addr))->sa_family

To correct the issue, sockaddr_ib would be changed to match sockaddr:

Old:
struct sockaddr_ib {
  unsigned short int  sib_family; /* AF_IB */
  ...
}

New:
struct sockaddr_ib {
  unsigned char   sib_len;
  sa_family       sib_family; /* AF_IB */
  ...
}

The change will need to be made in two locations:
sys/ofed/include/rdma/ib.h
contrib/ofed/librdmacm/ib.h

-- 
You are receiving this mail because:
You are the assignee for the bug.