netisr defered - active only one thread
Alexander V. Chernikov
melifaro at FreeBSD.org
Thu Feb 2 15:51:27 UTC 2012
On 02.02.2012 12:59, Коньков Евгений wrote:
> Здравствуйте, Andrey.
>
> Вы писали 2 февраля 2012 г., 8:35:23:
>
> AZ> On 02.02.2012 5:11, Alexander V. Chernikov wrote:
>>> On 01.02.2012 20:45, Andrey Zonov wrote:
>>>> Hi,
>>>>
>>>> I'm trying to tune machine with 8.2-STABLE for heavy network load and
>>>> now playing with netisr. Could anyone explain me why actually works only
>>>> one netisr thread if I set them to 8?
>>>
>>> Can you please supply `nestat -Q` output and clarify you usage pattern ?
>>> (I mean, this is router/web server/some kind of traffic receiver/etc..).
>>> For example, flow policy does not balance traffic from single flow
>>> between different CPUs.
>>>
>
> AZ> This is a web server with multiple nginx instances. 5k/sec accepted
> AZ> connections. Input packet rate is 35kpps, output - 25kpps.
>
> AZ> I thought of changing policy for IP, but how can I do this (without
> AZ> patching)? Is it safe?
>
> AZ> netstat -Q (I turned on direct& direct force for now):
> AZ> Configuration:
> AZ> Setting Value Maximum
> AZ> Thread count 8 8
> AZ> Default queue limit 256 10240
> AZ> Direct dispatch enabled n/a
> AZ> Forced direct dispatch enabled n/a
> AZ> Threads bound to CPUs enabled n/a
>
> AZ> Protocols:
> AZ> Name Proto QLimit Policy Flags
> AZ> ip 1 5000 flow ---
> AZ> igmp 2 256 source ---
> AZ> rtsock 3 256 source ---
> AZ> arp 7 256 source ---
> AZ> ip6 10 256 flow ---
>
> AZ> Workstreams:
> AZ> WSID CPU Name Len WMark Disp'd HDisp'd QDrops Queued Handled
> AZ> 0 0 ip 0 0 1125716 0 0 0 1125716
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 1 0 0 0 102 102
> AZ> arp 0 0 27 0 0 0 27
> AZ> ip6 0 0 0 0 0 0
> AZ> 1 1 ip 0 0 1222701 0 0 0 1222701
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 0 46 0 0 0 46
> AZ> ip6 0 0 0 0 0 0
> AZ> 2 2 ip 0 0 1184381 0 0 0 1184381
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 0 45 0 0 0 45
> AZ> ip6 0 0 0 0 0 0
> AZ> 3 3 ip 0 0 1191094 0 0 0 1191094
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 0 54 0 0 0 54
> AZ> ip6 0 0 0 0 0 0
> AZ> 4 4 ip 0 0 846165 0 0 0 846165
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 0 19 0 0 0 19
> AZ> ip6 0 0 0 0 0 0
> AZ> 5 5 ip 0 0 849478 0 0 0 849478
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 0 27 0 0 0 27
> AZ> ip6 0 0 0 0 0 0
> AZ> 6 6 ip 0 0 870836 0 0 0 870836
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 0 29 0 0 0 29
> AZ> ip6 0 0 0 0 0 0
> AZ> 7 7 ip 0 5000 594320 5 910862 3453459 4047784
> AZ> igmp 0 0 0 0 0 0
> AZ> rtsock 0 0 0 0 0 0
> AZ> arp 0 5 21 0 0 109 130
> AZ> ip6 0 1 0 0 0 1
>
> same problem, it is because one netisr take 100% so other threads
> stops?? to work fine. or packet scheduler has disbalanced scheduler
> and still trying to schedule packet to netisr:7 despite on it is 100%
> busy.
Can you please try an attached patch?
Rebuild kernel with this patch and set net.isr.dispatch to deferred / hybrid
P.S. it is also reasonable to set net.isr.bindthreads to 1
>
>
>
--
WBR, Alexander
-------------- next part --------------
Index: sys/netinet/ip_input.c
===================================================================
--- sys/netinet/ip_input.c (revision 230910)
+++ sys/netinet/ip_input.c (working copy)
@@ -78,6 +78,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_ipsec.h>
#endif /* IPSEC */
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/sctp.h>
+#include <libkern/jenkins.h>
+
#include <sys/socketvar.h>
#include <security/mac/mac_framework.h>
@@ -145,9 +150,13 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_inte
VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */
+static VNET_DEFINE(uint32_t, flow_hashjitter);
+#define V_flow_hashjitter VNET(flow_hashjitter)
+static struct mbuf * ip_hash_mbuf(struct mbuf *m, uintptr_t source);
static struct netisr_handler ip_nh = {
.nh_name = "ip",
.nh_handler = ip_input,
+ .nh_m2flow = ip_hash_mbuf,
.nh_proto = NETISR_IP,
.nh_policy = NETISR_POLICY_FLOW,
};
@@ -305,6 +314,9 @@ ip_init(void)
NULL, UMA_ALIGN_PTR, 0);
maxnipq_update();
+ if (V_flow_hashjitter == 0)
+ V_flow_hashjitter = arc4random();
+
/* Initialize packet filter hooks. */
V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
V_inet_pfil_hook.ph_af = AF_INET;
@@ -390,6 +402,73 @@ ip_fini(void *xtp)
callout_stop(&ipport_tick_callout);
}
+static struct mbuf *
+ip_hash_mbuf(struct mbuf *m, uintptr_t source)
+{
+ struct ip *ip;
+ uint8_t proto;
+ int iphlen, offset;
+ uint32_t key[3];
+ struct tcphdr *th;
+ struct udphdr *uh;
+ struct sctphdr *sh;
+ uint16_t sport = 0, dport = 0;
+ uint32_t flowid, pullup_len = 0;
+
+#define M_CHECK(length) do { \
+ pullup_len += length; \
+ if ((m)->m_pkthdr.len < (pullup_len)) \
+ return (m); \
+ if ((m)->m_len < (pullup_len) && \
+ (((m) = m_pullup((m),(pullup_len))) == NULL)) \
+ return NULL; \
+} while (0)
+
+ M_CHECK(sizeof(struct ip));
+ ip = mtod(m, struct ip *);
+
+ proto = ip->ip_p;
+ iphlen = ip->ip_hl << 2; /* XXX options? */
+
+ key[0] = 0;
+ key[1] = ip->ip_src.s_addr;
+ key[2] = ip->ip_dst.s_addr;
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ M_CHECK(sizeof(struct tcphdr));
+ th = (struct tcphdr *)((caddr_t)ip + iphlen);
+ sport = th->th_sport;
+ dport = th->th_dport;
+ break;
+ case IPPROTO_UDP:
+ M_CHECK(sizeof(struct udphdr));
+ uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ sport = uh->uh_sport;
+ dport = uh->uh_dport;
+ break;
+ case IPPROTO_SCTP:
+ M_CHECK(sizeof(struct sctphdr));
+ sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+ sport = sh->src_port;
+ dport = sh->dest_port;
+ break;
+ }
+
+ if (sport > 0) {
+ ((uint16_t *)key)[0] = sport;
+ ((uint16_t *)key)[1] = dport;
+ offset = 0;
+ } else
+ offset = V_flow_hashjitter + proto;
+
+ flowid = jenkins_hashword(key, 3, offset);
+ m->m_flags |= M_FLOWID;
+ m->m_pkthdr.flowid = flowid;
+
+ return m;
+}
+
/*
* Ip input routine. Checksum and byte swap header. If fragmented
* try to reassemble. Process options. Pass to next level.
More information about the freebsd-net
mailing list