Network stack profiling/optimisation
Slawa Olhovchenkov
slw at zxy.spb.ru
Fri Aug 26 15:17:32 UTC 2016
I am collect some data releted to network stack performance under heavy load.
This is data collected on dual E5-2620, under 20Gbit load.
At time peak network traffic (more then 25K connections, about 20Gbit
total traffic) half of cores fully utilised by network stack.
This is flamegraph from one core: http://zxy.spb.ru/cpu10.svg
This is same, but stack cut of at ixgbe_rxeof for more unified
tcp/ip stack view http://zxy.spb.ru/cpu10u.svg
Top 3 used lines is:
7036 0xffffffff804bf02d atomic_cmpset_long /usr/obj/usr/src/sys/VSTREAM/./machine/atomic.h:163
static __inline int
atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
{
u_char res;
__asm __volatile(
" " MPLOCKED " "
> " cmpxchgq %3,%1 ; "
" sete %0 ; "
"# atomic_cmpset_long"
: "=q" (res), /* 0 */
"+m" (*dst), /* 1 */
"+a" (expect) /* 2 */
: "r" (src) /* 3 */
: "memory", "cc");
return (res);
}
6099 0xffffffff81171963 ?? ??:0
0xffffffff81171940 <ixgbe_rxeof+1168>: mov 0x10(%r15),%rax
0xffffffff81171944 <ixgbe_rxeof+1172>: add $0x8,%rax
0xffffffff81171948 <ixgbe_rxeof+1176>: mov -0x4c(%rbp),%ecx
0xffffffff8117194b <ixgbe_rxeof+1179>: test %cx,%cx
0xffffffff8117194e <ixgbe_rxeof+1182>: mov %rax,0x10(%r15)
0xffffffff81171952 <ixgbe_rxeof+1186>: je 0xffffffff8117198d <ixgbe_rxeof+1245>
0xffffffff81171954 <ixgbe_rxeof+1188>: mov 0x10(%rdi),%rcx
0xffffffff81171958 <ixgbe_rxeof+1192>: mov -0x4c(%rbp),%edx
0xffffffff8117195b <ixgbe_rxeof+1195>: nopl 0x0(%rax,%rax,1)
0xffffffff81171960 <ixgbe_rxeof+1200>: mov (%rcx),%rsi
0xffffffff81171963 <ixgbe_rxeof+1203>: mov %rsi,(%rax)
0xffffffff81171966 <ixgbe_rxeof+1206>: mov 0x8(%rcx),%rsi
0xffffffff8117196a <ixgbe_rxeof+1210>: mov %rsi,0x8(%rax)
0xffffffff8117196e <ixgbe_rxeof+1214>: mov 0x10(%rcx),%rsi
0xffffffff81171972 <ixgbe_rxeof+1218>: mov %rsi,0x10(%rax)
0xffffffff81171976 <ixgbe_rxeof+1222>: mov 0x18(%rcx),%rsi
0xffffffff8117197a <ixgbe_rxeof+1226>: mov %rsi,0x18(%rax)
0xffffffff8117197e <ixgbe_rxeof+1230>: add $0xffffffffffffffe0,%edx
0xffffffff81171981 <ixgbe_rxeof+1233>: add $0x20,%rcx
0xffffffff81171985 <ixgbe_rxeof+1237>: add $0x20,%rax
0xffffffff81171989 <ixgbe_rxeof+1241>: test %edx,%edx
5594 0xffffffff8053395a mb_free_ext /usr/src/sys/kern/uipc_mbuf.c:301
if (*(m->m_ext.ref_cnt) == 1 ||
I am able collect and process more measure for help
to improve FreeBSD network stack.
Have someone any idea about this?
I am don't see evident and simple points of optimisation :(
More information about the freebsd-net
mailing list