Update: Alternate port randomization approaches
Mike Silbersack
silby at silby.com
Wed Dec 29 01:02:24 PST 2004
On Sat, 18 Dec 2004, Mike Silbersack wrote:
> There have been a few reports by users of front end web proxies and other
> systems under FreeBSD that port randomization causes them problems under
> load. This seems to be due to a combination of port randomization and rapid
> connections to the same host causing ports to be recycled before the ISN has
> advanced past the end of the previous connection, thereby causing the
> TIME_WAIT socket on the receiving end to ignore the new SYN.
Based on testing done by Igor Sysoev, I've found that my original patch is
insufficient; even as little as one randomizaion per second can cause
problems for some users. As a result, I've created the attached patch
(versions for both 6.x and 4.x are included). It implements a relatively
simple algorithm: Port randomization is turned disable once the
connection rate goes above 20 connections per second, and it is not
reenabled until the connection rate falls below 20 cps for 5 seconds
straight.
This appears to work for Igor, and it seems safe enough to commit before
4.11-RC2. But, if possible, I'd like a few more sets of eyes to
doublecheck the concept and code; please take a look at it if you have a
chance.
Thanks,
Mike "Silby" Silbersack
-------------- next part --------------
diff -u -r /usr/src/sys.old/netinet/in_pcb.c /usr/src/sys/netinet/in_pcb.c
--- /usr/src/sys.old/netinet/in_pcb.c Thu Dec 16 03:26:11 2004
+++ /usr/src/sys/netinet/in_pcb.c Sat Dec 25 17:07:56 2004
@@ -62,6 +62,8 @@
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -95,8 +97,12 @@
int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
-/* Shall we allocate ephemeral ports in random order? */
-int ipport_randomized = 0;
+/* Variables dealing with random ephemeral port allocation. */
+int ipport_randomized = 1; /* user controlled via sysctl */
+int ipport_randomcps = 20; /* user controlled via sysctl */
+int ipport_stoprandom = 0; /* toggled by ipport_tick */
+int ipport_tcpallocs;
+int ipport_tcplastcount;
#define RANGECHK(var, min, max) \
if ((var) < (min)) { (var) = (min); } \
@@ -136,6 +142,8 @@
&ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
&ipport_randomized, 0, "");
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
+ CTLFLAG_RW, &ipport_randomcps, 0, "");
/*
* in_pcb.c: manage the Protocol Control Blocks.
@@ -200,6 +208,7 @@
u_short lport = 0;
int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
int error, prison = 0;
+ int dorandom;
if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
return (EADDRNOTAVAIL);
@@ -313,6 +322,20 @@
lastport = &pcbinfo->lastport;
}
/*
+ * For UDP, use random port allocation as long as the user
+ * allows it. For TCP (and as of yet unknown) connections,
+ * use random port allocation only if the user allows it AND
+ * ipport_tick allows it.
+ */
+ if (ipport_randomized &&
+ (!ipport_stoprandom || pcbinfo == &udbinfo))
+ dorandom = 1;
+ else
+ dorandom = 0;
+ /* Make sure to not include UDP packets in the count. */
+ if (pcbinfo != &udbinfo)
+ ipport_tcpallocs++;
+ /*
* Simple check to ensure all ports are not used up causing
* a deadlock here.
*
@@ -323,7 +346,7 @@
/*
* counting down
*/
- if (ipport_randomized)
+ if (dorandom)
*lastport = first -
(arc4random() % (first - last));
count = first - last;
@@ -343,7 +366,7 @@
/*
* counting up
*/
- if (ipport_randomized)
+ if (dorandom)
*lastport = first +
(arc4random() % (last - first));
count = last - first;
@@ -1046,4 +1069,30 @@
if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
return (0);
return (1);
+}
+
+/*
+ * ipport_tick runs once per second, determining if random port
+ * allocation should be continued. If more than ipport_randomcps
+ * ports have been allocated in the last second, then we return to
+ * sequential port allocation. We return to random allocation only
+ * once we drop below ipport_randomcps for at least 5 seconds.
+ */
+
+void
+ipport_tick(xtp)
+ void *xtp;
+{
+ if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) {
+ if (ipport_stoprandom == 0)
+ printf("Stopping random allocation\n");
+ ipport_stoprandom = 5;
+ } else {
+ if (ipport_stoprandom == 1)
+ printf("Going back to random allocation\n");
+ if (ipport_stoprandom > 0)
+ ipport_stoprandom--;
+ }
+ ipport_tcplastcount = ipport_tcpallocs;
+ callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
}
diff -u -r /usr/src/sys.old/netinet/in_pcb.h /usr/src/sys/netinet/in_pcb.h
--- /usr/src/sys.old/netinet/in_pcb.h Thu Dec 16 03:26:11 2004
+++ /usr/src/sys/netinet/in_pcb.h Sat Dec 25 17:09:01 2004
@@ -310,6 +310,7 @@
extern int ipport_lastauto;
extern int ipport_hifirstauto;
extern int ipport_hilastauto;
+extern struct callout ipport_tick_callout;
void in_pcbpurgeif0 __P((struct inpcb *, struct ifnet *));
void in_losing __P((struct inpcb *));
@@ -335,6 +336,7 @@
int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
int in_setsockaddr __P((struct socket *so, struct sockaddr **nam));
void in_pcbremlists __P((struct inpcb *inp));
+void ipport_tick(void *xtp);
int prison_xinpcb __P((struct proc *p, struct inpcb *inp));
#endif /* _KERNEL */
diff -u -r /usr/src/sys.old/netinet/ip_input.c /usr/src/sys/netinet/ip_input.c
--- /usr/src/sys.old/netinet/ip_input.c Thu Dec 16 03:26:12 2004
+++ /usr/src/sys/netinet/ip_input.c Sat Dec 25 17:16:08 2004
@@ -47,6 +47,8 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/domain.h>
@@ -183,6 +185,7 @@
(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
static struct ipq ipq[IPREASS_NHASH];
+struct callout ipport_tick_callout;
const int ipintrq_present = 1;
#ifdef IPCTL_DEFMTU
@@ -267,6 +270,12 @@
maxnipq = nmbclusters / 32;
maxfragsperpacket = 16;
+ /* Start ipport_tick. */
+ callout_init(&ipport_tick_callout);
+ ipport_tick(NULL);
+ EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
+ SHUTDOWN_PRI_DEFAULT);
+
#ifndef RANDOM_IP_ID
ip_id = time_second & 0xffff;
#endif
@@ -274,6 +283,13 @@
register_netisr(NETISR_IP, ipintr);
}
+
+void ip_fini(xtp)
+ void *xtp;
+{
+ callout_stop(&ipport_tick_callout);
+}
+
/*
* XXX watch out this one. It is perhaps used as a cache for
diff -u -r /usr/src/sys.old/netinet/ip_var.h /usr/src/sys/netinet/ip_var.h
--- /usr/src/sys.old/netinet/ip_var.h Thu Dec 16 03:26:12 2004
+++ /usr/src/sys/netinet/ip_var.h Sat Dec 25 17:12:12 2004
@@ -160,6 +160,7 @@
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
+void ip_fini(void *xtp);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum);
void ip_freemoptions(struct ip_moptions *);
-------------- next part --------------
diff -u -r /usr/src/sys.old/netinet/in_pcb.c /usr/src/sys/netinet/in_pcb.c
--- /usr/src/sys.old/netinet/in_pcb.c Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/in_pcb.c Sat Dec 25 13:51:24 2004
@@ -59,6 +59,8 @@
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -97,8 +99,12 @@
int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */
int ipport_reservedlow = 0;
-/* Shall we allocate ephemeral ports in random order? */
-int ipport_randomized = 1;
+/* Variables dealing with random ephemeral port allocation. */
+int ipport_randomized = 1; /* user controlled via sysctl */
+int ipport_randomcps = 20; /* user controlled via sysctl */
+int ipport_stoprandom = 0; /* toggled by ipport_tick */
+int ipport_tcpallocs;
+int ipport_tcplastcount;
#define RANGECHK(var, min, max) \
if ((var) < (min)) { (var) = (min); } \
@@ -143,6 +149,8 @@
CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
CTLFLAG_RW, &ipport_randomized, 0, "");
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
+ CTLFLAG_RW, &ipport_randomcps, 0, "");
/*
* in_pcb.c: manage the Protocol Control Blocks.
@@ -266,6 +274,7 @@
u_short lport = 0;
int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
int error, prison = 0;
+ int dorandom;
INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
@@ -394,6 +403,20 @@
lastport = &pcbinfo->lastport;
}
/*
+ * For UDP, use random port allocation as long as the user
+ * allows it. For TCP (and as of yet unknown) connections,
+ * use random port allocation only if the user allows it AND
+ * ipport_tick allows it.
+ */
+ if (ipport_randomized &&
+ (!ipport_stoprandom || pcbinfo == &udbinfo))
+ dorandom = 1;
+ else
+ dorandom = 0;
+ /* Make sure to not include UDP packets in the count. */
+ if (pcbinfo != &udbinfo)
+ ipport_tcpallocs++;
+ /*
* Simple check to ensure all ports are not used up causing
* a deadlock here.
*
@@ -404,7 +427,7 @@
/*
* counting down
*/
- if (ipport_randomized)
+ if (dorandom)
*lastport = first -
(arc4random() % (first - last));
count = first - last;
@@ -422,7 +445,7 @@
/*
* counting up
*/
- if (ipport_randomized)
+ if (dorandom)
*lastport = first +
(arc4random() % (last - first));
count = last - first;
@@ -1180,4 +1203,30 @@
SOCK_UNLOCK(so);
INP_UNLOCK(inp);
#endif
+}
+
+/*
+ * ipport_tick runs once per second, determining if random port
+ * allocation should be continued. If more than ipport_randomcps
+ * ports have been allocated in the last second, then we return to
+ * sequential port allocation. We return to random allocation only
+ * once we drop below ipport_randomcps for at least 5 seconds.
+ */
+
+void
+ipport_tick(xtp)
+ void *xtp;
+{
+ if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) {
+ if (ipport_stoprandom == 0)
+ printf("Stopping random allocation\n");
+ ipport_stoprandom = 5;
+ } else {
+ if (ipport_stoprandom == 1)
+ printf("Going back to random allocation\n");
+ if (ipport_stoprandom > 0)
+ ipport_stoprandom--;
+ }
+ ipport_tcplastcount = ipport_tcpallocs;
+ callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
}
diff -u -r /usr/src/sys.old/netinet/in_pcb.h /usr/src/sys/netinet/in_pcb.h
--- /usr/src/sys.old/netinet/in_pcb.h Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/in_pcb.h Fri Dec 24 20:02:14 2004
@@ -333,6 +333,7 @@
extern int ipport_lastauto;
extern int ipport_hifirstauto;
extern int ipport_hilastauto;
+extern struct callout ipport_tick_callout;
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *, const char *);
@@ -362,6 +363,7 @@
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
void in_pcbremlists(struct inpcb *inp);
+void ipport_tick(void *xtp);
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */
diff -u -r /usr/src/sys.old/netinet/ip_input.c /usr/src/sys/netinet/ip_input.c
--- /usr/src/sys.old/netinet/ip_input.c Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/ip_input.c Sat Dec 25 13:37:51 2004
@@ -38,6 +38,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/callout.h>
#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
@@ -186,6 +187,7 @@
static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
struct mtx ipqlock;
+struct callout ipport_tick_callout;
#define IPQ_LOCK() mtx_lock(&ipqlock)
#define IPQ_UNLOCK() mtx_unlock(&ipqlock)
@@ -279,11 +281,23 @@
maxnipq = nmbclusters / 32;
maxfragsperpacket = 16;
+ /* Start ipport_tick. */
+ callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+ ipport_tick(NULL);
+ EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
+ SHUTDOWN_PRI_DEFAULT);
+
/* Initialize various other remaining things. */
ip_id = time_second & 0xffff;
ipintrq.ifq_maxlen = ipqmaxlen;
mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
+}
+
+void ip_fini(xtp)
+ void *xtp;
+{
+ callout_stop(&ipport_tick_callout);
}
/*
Only in /usr/src/sys/netinet: ip_input.c.orig
diff -u -r /usr/src/sys.old/netinet/ip_var.h /usr/src/sys/netinet/ip_var.h
--- /usr/src/sys.old/netinet/ip_var.h Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/ip_var.h Sat Dec 25 13:29:54 2004
@@ -159,6 +159,7 @@
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
+void ip_fini(void *xtp);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum);
void ip_freemoptions(struct ip_moptions *);
More information about the freebsd-net
mailing list