Update: Alternate port randomization approaches

Mike Silbersack silby at silby.com
Wed Dec 29 01:02:24 PST 2004



On Sat, 18 Dec 2004, Mike Silbersack wrote:

> There have been a few reports by users of front end web proxies and other 
> systems under FreeBSD that port randomization causes them problems under 
> load.  This seems to be due to a combination of port randomization and rapid 
> connections to the same host causing ports to be recycled before the ISN has 
> advanced past the end of the previous connection, thereby causing the 
> TIME_WAIT socket on the receiving end to ignore the new SYN.

Based on testing done by Igor Sysoev, I've found that my original patch is 
insufficient; even as little as one randomizaion per second can cause 
problems for some users.  As a result, I've created the attached patch 
(versions for both 6.x and 4.x are included).  It implements a relatively 
simple algorithm:  Port randomization is turned disable once the 
connection rate goes above 20 connections per second, and it is not 
reenabled until the connection rate falls below 20 cps for 5 seconds 
straight.

This appears to work for Igor, and it seems safe enough to commit before 
4.11-RC2.  But, if possible, I'd like a few more sets of eyes to 
doublecheck the concept and code; please take a look at it if you have a 
chance.

Thanks,

Mike "Silby" Silbersack
-------------- next part --------------
diff -u -r /usr/src/sys.old/netinet/in_pcb.c /usr/src/sys/netinet/in_pcb.c
--- /usr/src/sys.old/netinet/in_pcb.c	Thu Dec 16 03:26:11 2004
+++ /usr/src/sys/netinet/in_pcb.c	Sat Dec 25 17:07:56 2004
@@ -62,6 +62,8 @@
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
@@ -95,8 +97,12 @@
 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
 
-/* Shall we allocate ephemeral ports in random order? */
-int	ipport_randomized = 0;
+/* Variables dealing with random ephemeral port allocation. */
+int	ipport_randomized = 1;	/* user controlled via sysctl */
+int	ipport_randomcps = 20;	/* user controlled via sysctl */
+int	ipport_stoprandom = 0;	/* toggled by ipport_tick */
+int	ipport_tcpallocs;
+int	ipport_tcplastcount;
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
@@ -136,6 +142,8 @@
 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
 	   &ipport_randomized, 0, "");
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
+          CTLFLAG_RW, &ipport_randomcps, 0, "");
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
@@ -200,6 +208,7 @@
 	u_short lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error, prison = 0;
+	int dorandom;
 
 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
@@ -313,6 +322,20 @@
 			lastport = &pcbinfo->lastport;
 		}
 		/*
+		* For UDP, use random port allocation as long as the user
+		* allows it.  For TCP (and as of yet unknown) connections,
+		* use random port allocation only if the user allows it AND
+		* ipport_tick allows it.
+		*/
+		if (ipport_randomized &&
+			(!ipport_stoprandom || pcbinfo == &udbinfo))
+			dorandom = 1;
+		else
+			dorandom = 0;
+		/* Make sure to not include UDP packets in the count. */
+		if (pcbinfo != &udbinfo)
+			ipport_tcpallocs++;
+		/*
 		 * Simple check to ensure all ports are not used up causing
 		 * a deadlock here.
 		 *
@@ -323,7 +346,7 @@
 			/*
 			 * counting down
 			 */
-			if (ipport_randomized)
+			if (dorandom)
 				*lastport = first -
 					    (arc4random() % (first - last));
 			count = first - last;
@@ -343,7 +366,7 @@
 			/*
 			 * counting up
 			 */
-			if (ipport_randomized)
+			if (dorandom)
 				*lastport = first +
 					    (arc4random() % (last - first));
 			count = last - first;
@@ -1046,4 +1069,30 @@
 	if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
 		return (0);
 	return (1);
+}
+
+/*
+ * ipport_tick runs once per second, determining if random port
+ * allocation should be continued.  If more than ipport_randomcps
+ * ports have been allocated in the last second, then we return to
+ * sequential port allocation. We return to random allocation only
+ * once we drop below ipport_randomcps for at least 5 seconds.
+ */
+
+void
+ipport_tick(xtp)
+	void *xtp;
+{
+	if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) {
+		if (ipport_stoprandom == 0)
+			printf("Stopping random allocation\n");
+		ipport_stoprandom = 5;
+	} else {
+		if (ipport_stoprandom == 1)
+			printf("Going back to random allocation\n");
+		if (ipport_stoprandom > 0)
+			ipport_stoprandom--;
+	}
+	ipport_tcplastcount = ipport_tcpallocs;
+	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
diff -u -r /usr/src/sys.old/netinet/in_pcb.h /usr/src/sys/netinet/in_pcb.h
--- /usr/src/sys.old/netinet/in_pcb.h	Thu Dec 16 03:26:11 2004
+++ /usr/src/sys/netinet/in_pcb.h	Sat Dec 25 17:09:01 2004
@@ -310,6 +310,7 @@
 extern int	ipport_lastauto;
 extern int	ipport_hifirstauto;
 extern int	ipport_hilastauto;
+extern struct callout ipport_tick_callout;
 
 void	in_pcbpurgeif0 __P((struct inpcb *, struct ifnet *));
 void	in_losing __P((struct inpcb *));
@@ -335,6 +336,7 @@
 int	in_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
 int	in_setsockaddr __P((struct socket *so, struct sockaddr **nam));
 void	in_pcbremlists __P((struct inpcb *inp));
+void	ipport_tick(void *xtp);
 int	prison_xinpcb __P((struct proc *p, struct inpcb *inp));
 #endif /* _KERNEL */
 
diff -u -r /usr/src/sys.old/netinet/ip_input.c /usr/src/sys/netinet/ip_input.c
--- /usr/src/sys.old/netinet/ip_input.c	Thu Dec 16 03:26:12 2004
+++ /usr/src/sys/netinet/ip_input.c	Sat Dec 25 17:16:08 2004
@@ -47,6 +47,8 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/eventhandler.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
@@ -183,6 +185,7 @@
 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
 
 static struct ipq ipq[IPREASS_NHASH];
+struct callout ipport_tick_callout;
 const  int    ipintrq_present = 1;
 
 #ifdef IPCTL_DEFMTU
@@ -267,6 +270,12 @@
 	maxnipq = nmbclusters / 32;
 	maxfragsperpacket = 16;
 
+	/* Start ipport_tick. */
+	callout_init(&ipport_tick_callout);
+	ipport_tick(NULL);
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
+		SHUTDOWN_PRI_DEFAULT);
+
 #ifndef RANDOM_IP_ID
 	ip_id = time_second & 0xffff;
 #endif
@@ -274,6 +283,13 @@
 
 	register_netisr(NETISR_IP, ipintr);
 }
+
+void ip_fini(xtp)
+	void *xtp;
+{
+	callout_stop(&ipport_tick_callout);
+}
+
 
 /*
  * XXX watch out this one. It is perhaps used as a cache for
diff -u -r /usr/src/sys.old/netinet/ip_var.h /usr/src/sys/netinet/ip_var.h
--- /usr/src/sys.old/netinet/ip_var.h	Thu Dec 16 03:26:12 2004
+++ /usr/src/sys/netinet/ip_var.h	Sat Dec 25 17:12:12 2004
@@ -160,6 +160,7 @@
 
 int	 ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	 ip_drain(void);
+void	 ip_fini(void *xtp);
 int	 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags, int sw_csum);
 void	 ip_freemoptions(struct ip_moptions *);
-------------- next part --------------
diff -u -r /usr/src/sys.old/netinet/in_pcb.c /usr/src/sys/netinet/in_pcb.c
--- /usr/src/sys.old/netinet/in_pcb.c	Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/in_pcb.c	Sat Dec 25 13:51:24 2004
@@ -59,6 +59,8 @@
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
@@ -97,8 +99,12 @@
 int	ipport_reservedhigh = IPPORT_RESERVED - 1;	/* 1023 */
 int	ipport_reservedlow = 0;
 
-/* Shall we allocate ephemeral ports in random order? */
-int	ipport_randomized = 1;
+/* Variables dealing with random ephemeral port allocation. */
+int	ipport_randomized = 1;	/* user controlled via sysctl */
+int	ipport_randomcps = 20;	/* user controlled via sysctl */
+int	ipport_stoprandom = 0;	/* toggled by ipport_tick */
+int	ipport_tcpallocs;
+int	ipport_tcplastcount;
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
@@ -143,6 +149,8 @@
 	   CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
 	   CTLFLAG_RW, &ipport_randomized, 0, "");
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
+	   CTLFLAG_RW, &ipport_randomcps, 0, "");
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
@@ -266,6 +274,7 @@
 	u_short lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error, prison = 0;
+	int dorandom;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_LOCK_ASSERT(inp);
@@ -394,6 +403,20 @@
 			lastport = &pcbinfo->lastport;
 		}
 		/*
+		 * For UDP, use random port allocation as long as the user
+		 * allows it.  For TCP (and as of yet unknown) connections,
+		 * use random port allocation only if the user allows it AND
+		 * ipport_tick allows it.
+		 */
+		if (ipport_randomized &&
+			(!ipport_stoprandom || pcbinfo == &udbinfo))
+			dorandom = 1;
+		else
+			dorandom = 0;
+		/* Make sure to not include UDP packets in the count. */
+		if (pcbinfo != &udbinfo)
+			ipport_tcpallocs++;
+		/*
 		 * Simple check to ensure all ports are not used up causing
 		 * a deadlock here.
 		 *
@@ -404,7 +427,7 @@
 			/*
 			 * counting down
 			 */
-			if (ipport_randomized)
+			if (dorandom)
 				*lastport = first -
 					    (arc4random() % (first - last));
 			count = first - last;
@@ -422,7 +445,7 @@
 			/*
 			 * counting up
 			 */
-			if (ipport_randomized)
+			if (dorandom)
 				*lastport = first +
 					    (arc4random() % (last - first));
 			count = last - first;
@@ -1180,4 +1203,30 @@
 	SOCK_UNLOCK(so);
 	INP_UNLOCK(inp);
 #endif
+}
+
+/*
+ * ipport_tick runs once per second, determining if random port
+ * allocation should be continued.  If more than ipport_randomcps
+ * ports have been allocated in the last second, then we return to
+ * sequential port allocation. We return to random allocation only
+ * once we drop below ipport_randomcps for at least 5 seconds.
+ */
+
+void
+ipport_tick(xtp)
+	void *xtp;
+{
+	if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) {
+		if (ipport_stoprandom == 0)
+			printf("Stopping random allocation\n");
+		ipport_stoprandom = 5;
+	} else {
+		if (ipport_stoprandom == 1)
+			printf("Going back to random allocation\n");
+		if (ipport_stoprandom > 0)
+			ipport_stoprandom--;
+	}
+	ipport_tcplastcount = ipport_tcpallocs;
+	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
diff -u -r /usr/src/sys.old/netinet/in_pcb.h /usr/src/sys/netinet/in_pcb.h
--- /usr/src/sys.old/netinet/in_pcb.h	Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/in_pcb.h	Fri Dec 24 20:02:14 2004
@@ -333,6 +333,7 @@
 extern int	ipport_lastauto;
 extern int	ipport_hifirstauto;
 extern int	ipport_hilastauto;
+extern struct callout ipport_tick_callout;
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
 int	in_pcballoc(struct socket *, struct inpcbinfo *, const char *);
@@ -362,6 +363,7 @@
 	in_sockaddr(in_port_t port, struct in_addr *addr);
 void	in_pcbsosetlabel(struct socket *so);
 void	in_pcbremlists(struct inpcb *inp);
+void	ipport_tick(void *xtp);
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IN_PCB_H_ */
diff -u -r /usr/src/sys.old/netinet/ip_input.c /usr/src/sys/netinet/ip_input.c
--- /usr/src/sys.old/netinet/ip_input.c	Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/ip_input.c	Sat Dec 25 13:37:51 2004
@@ -38,6 +38,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/callout.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
@@ -186,6 +187,7 @@
 
 static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
 struct mtx ipqlock;
+struct callout ipport_tick_callout;
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
@@ -279,11 +281,23 @@
 	maxnipq = nmbclusters / 32;
 	maxfragsperpacket = 16;
 
+	/* Start ipport_tick. */
+	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+	ipport_tick(NULL);
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
+		SHUTDOWN_PRI_DEFAULT);
+
 	/* Initialize various other remaining things. */
 	ip_id = time_second & 0xffff;
 	ipintrq.ifq_maxlen = ipqmaxlen;
 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
+}
+
+void ip_fini(xtp)
+	void *xtp;
+{
+	callout_stop(&ipport_tick_callout);
 }
 
 /*
Only in /usr/src/sys/netinet: ip_input.c.orig
diff -u -r /usr/src/sys.old/netinet/ip_var.h /usr/src/sys/netinet/ip_var.h
--- /usr/src/sys.old/netinet/ip_var.h	Fri Dec 24 19:45:15 2004
+++ /usr/src/sys/netinet/ip_var.h	Sat Dec 25 13:29:54 2004
@@ -159,6 +159,7 @@
 
 int	 ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	 ip_drain(void);
+void	 ip_fini(void *xtp);
 int	 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags, int sw_csum);
 void	 ip_freemoptions(struct ip_moptions *);


More information about the freebsd-net mailing list