svn commit: r185051 - in user/lstewart/dummynet_8.x: sbin/ipfw sys sys/kern sys/modules sys/modules/alq sys/netinet sys/sys

Lawrence Stewart lstewart at FreeBSD.org
Tue Nov 18 05:36:02 PST 2008


Author: lstewart
Date: Tue Nov 18 13:36:01 2008
New Revision: 185051
URL: http://svn.freebsd.org/changeset/base/185051

Log:
  - Merge in my alq varlen patch for use by my dummynet logging mods
  - Fix up the ipfw man page change from my previous DPD commit (pointed out by
    brueffer@)
  - Add first pass attempt at detailed logging to dummynet based on a patch I use
    in house at work. Needs more polishing, but should be functional as is
    (haven't really tested this version of the patch at all yet).

Added:
  user/lstewart/dummynet_8.x/sys/modules/alq/
     - copied from r184954, user/lstewart/alq_varlen_8.x/sys/modules/alq/
Modified:
  user/lstewart/dummynet_8.x/sbin/ipfw/ipfw.8
  user/lstewart/dummynet_8.x/sys/   (props changed)
  user/lstewart/dummynet_8.x/sys/kern/kern_alq.c
  user/lstewart/dummynet_8.x/sys/modules/Makefile
  user/lstewart/dummynet_8.x/sys/netinet/ip_dummynet.c
  user/lstewart/dummynet_8.x/sys/netinet/ip_dummynet.h
  user/lstewart/dummynet_8.x/sys/sys/alq.h

Modified: user/lstewart/dummynet_8.x/sbin/ipfw/ipfw.8
==============================================================================
--- user/lstewart/dummynet_8.x/sbin/ipfw/ipfw.8	Tue Nov 18 13:24:38 2008	(r185050)
+++ user/lstewart/dummynet_8.x/sbin/ipfw/ipfw.8	Tue Nov 18 13:36:01 2008	(r185051)
@@ -1994,12 +1994,13 @@ Packet loss set.
 Argument
 .Ar packet-loss-set
 is a comma-delimited string of the form 10,30-31,1000 identifying the specific
-packets entering a queue/pipe to drop. In the given example, the 10th, 30th,
-31st and 1000th packet to enter the pipe/queue would be dropped. Clearing the
-counters on a pipe will cause the
+packets entering a queue/pipe to drop.
+In the given example, the 10th, 30th, 31st and 1000th packet to enter the
+pipe/queue would be dropped.
+Clearing the counters on a pipe will cause the
 .Ar packet-loss-set
-to be evaluated again from scratch. Use of this option mutually excludes use of
-the
+to be evaluated again from scratch.
+Use of this option mutually excludes use of the
 .Nm plr
 option.
 .Pp

Modified: user/lstewart/dummynet_8.x/sys/kern/kern_alq.c
==============================================================================
--- user/lstewart/dummynet_8.x/sys/kern/kern_alq.c	Tue Nov 18 13:24:38 2008	(r185050)
+++ user/lstewart/dummynet_8.x/sys/kern/kern_alq.c	Tue Nov 18 13:36:01 2008	(r185051)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2002, Jeffrey Roberson <jeff at freebsd.org>
+ * Copyright (c) 2008, Lawrence Stewart <lstewart at freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -51,14 +52,18 @@ __FBSDID("$FreeBSD$");
 struct alq {
 	int	aq_entmax;		/* Max entries */
 	int	aq_entlen;		/* Entry length */
+	int	aq_freebytes;		/* Bytes available in buffer */
+	int	aq_buflen;		/* Total length of our buffer */
 	char	*aq_entbuf;		/* Buffer for stored entries */
+	int	aq_writehead;
+	int	aq_writetail;
 	int	aq_flags;		/* Queue flags */
 	struct mtx	aq_mtx;		/* Queue lock */
 	struct vnode	*aq_vp;		/* Open vnode handle */
 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
-	struct ale	*aq_first;	/* First ent */
-	struct ale	*aq_entfree;	/* First free ent */
-	struct ale	*aq_entvalid;	/* First ent valid for writing */
+	//struct ale	*aq_first;	/* First ent */
+	//struct ale	*aq_entfree;	/* First free ent */
+	//struct ale	*aq_entvalid;	/* First ent valid for writing */
 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
 };
@@ -182,8 +187,14 @@ ald_daemon(void)
 	ALD_LOCK();
 
 	for (;;) {
-		while ((alq = LIST_FIRST(&ald_active)) == NULL)
-			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
+		while ((alq = LIST_FIRST(&ald_active)) == NULL
+				&& !ald_shutingdown)
+			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
+
+		if (ald_shutingdown) {
+			ALD_UNLOCK();
+			break;
+		}
 
 		ALQ_LOCK(alq);
 		ald_deactivate(alq);
@@ -191,9 +202,11 @@ ald_daemon(void)
 		needwakeup = alq_doio(alq);
 		ALQ_UNLOCK(alq);
 		if (needwakeup)
-			wakeup(alq);
+			wakeup_one(alq);
 		ALD_LOCK();
 	}
+
+	kthread_exit();
 }
 
 static void
@@ -204,6 +217,12 @@ ald_shutdown(void *arg, int howto)
 	ALD_LOCK();
 	ald_shutingdown = 1;
 
+	/* wake ald_daemon so that it exits*/
+	wakeup(&ald_active);
+
+	/* wait for ald_daemon to exit */
+	mtx_sleep(ald_thread, &ald_mtx, PWAIT, "aldslp", 0);
+
 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
 		LIST_REMOVE(alq, aq_link);
 		ALD_UNLOCK();
@@ -244,41 +263,45 @@ alq_doio(struct alq *alq)
 	struct vnode *vp;
 	struct uio auio;
 	struct iovec aiov[2];
-	struct ale *ale;
-	struct ale *alstart;
 	int totlen;
 	int iov;
 	int vfslocked;
 
+	KASSERT(alq->aq_freebytes != alq->aq_buflen,
+		("%s: queue emtpy!", __func__)
+	);
+
 	vp = alq->aq_vp;
 	td = curthread;
 	totlen = 0;
 	iov = 0;
 
-	alstart = ale = alq->aq_entvalid;
-	alq->aq_entvalid = NULL;
-
 	bzero(&aiov, sizeof(aiov));
 	bzero(&auio, sizeof(auio));
 
-	do {
-		if (aiov[iov].iov_base == NULL)
-			aiov[iov].iov_base = ale->ae_data;
-		aiov[iov].iov_len += alq->aq_entlen;
-		totlen += alq->aq_entlen;
-		/* Check to see if we're wrapping the buffer */
-		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
-			iov++;
-		ale->ae_flags &= ~AE_VALID;
-		ale = ale->ae_next;
-	} while (ale->ae_flags & AE_VALID);
+	/* start the write from the location of our buffer tail pointer */
+	aiov[iov].iov_base = alq->aq_entbuf + alq->aq_writetail;
+
+	if (alq->aq_writetail < alq->aq_writehead) {
+		/* buffer not wrapped */
+		totlen = aiov[iov].iov_len =  alq->aq_writehead -
+							alq->aq_writetail;
+	} else {
+		/*
+		 * buffer wrapped, requires 2 aiov entries:
+		 * - first is from writetail to end of buffer
+		 * - second is from start of buffer to writehead
+		 */
+		aiov[iov].iov_len =  alq->aq_buflen - alq->aq_writetail;
+		iov++;
+		aiov[iov].iov_base = alq->aq_entbuf;
+		aiov[iov].iov_len =  alq->aq_writehead;
+		totlen = aiov[0].iov_len + aiov[1].iov_len;
+	}
 
 	alq->aq_flags |= AQ_FLUSHING;
 	ALQ_UNLOCK(alq);
 
-	if (iov == 2 || aiov[iov].iov_base == NULL)
-		iov--;
-
 	auio.uio_iov = &aiov[0];
 	auio.uio_offset = 0;
 	auio.uio_segflg = UIO_SYSSPACE;
@@ -308,8 +331,17 @@ alq_doio(struct alq *alq)
 	ALQ_LOCK(alq);
 	alq->aq_flags &= ~AQ_FLUSHING;
 
-	if (alq->aq_entfree == NULL)
-		alq->aq_entfree = alstart;
+	/* Adjust writetail as required, taking into account wrapping */
+	alq->aq_writetail += (iov == 2) ? aiov[1].iov_len : totlen;
+	alq->aq_freebytes += totlen;
+
+	/*
+	 * If we just flushed the buffer completely,
+	 * reset indexes to 0 to minimise buffer wraps
+	 * This is also required to ensure alq_getn() can't wedge itself
+	 */
+	if (alq->aq_freebytes == alq->aq_buflen)
+		alq->aq_writehead = alq->aq_writetail = 0;
 
 	if (alq->aq_flags & AQ_WANTED) {
 		alq->aq_flags &= ~AQ_WANTED;
@@ -340,13 +372,13 @@ alq_open(struct alq **alqp, const char *
 {
 	struct thread *td;
 	struct nameidata nd;
-	struct ale *ale;
-	struct ale *alp;
 	struct alq *alq;
-	char *bufp;
 	int flags;
 	int error;
-	int i, vfslocked;
+	int vfslocked;
+
+	KASSERT(size > 0, ("%s: size <= 0", __func__));
+	KASSERT(count >= 0, ("%s: count < 0", __func__));
 
 	*alqp = NULL;
 	td = curthread;
@@ -365,31 +397,27 @@ alq_open(struct alq **alqp, const char *
 	VFS_UNLOCK_GIANT(vfslocked);
 
 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
-	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
-	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
 	alq->aq_vp = nd.ni_vp;
 	alq->aq_cred = crhold(cred);
-	alq->aq_entmax = count;
-	alq->aq_entlen = size;
-	alq->aq_entfree = alq->aq_first;
 
 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
 
-	bufp = alq->aq_entbuf;
-	ale = alq->aq_first;
-	alp = NULL;
-
-	/* Match up entries with buffers */
-	for (i = 0; i < count; i++) {
-		if (alp)
-			alp->ae_next = ale;
-		ale->ae_data = bufp;
-		alp = ale;
-		ale++;
-		bufp += size;
+	if (count > 0) {
+		/* fixed length messages */
+		alq->aq_buflen = size * count;
+		alq->aq_entmax = count;
+		alq->aq_entlen = size;
+	} else {
+		/* variable length messages */
+		alq->aq_buflen = size;
+		alq->aq_entmax = 0;
+		alq->aq_entlen = 0;
 	}
 
-	alp->ae_next = alq->aq_first;
+	alq->aq_freebytes = alq->aq_buflen;	
+	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
+
+	alq->aq_writehead = alq->aq_writetail = 0;
 
 	if ((error = ald_add(alq)) != 0)
 		return (error);
@@ -403,46 +431,180 @@ alq_open(struct alq **alqp, const char *
  * wait or return an error depending on the value of waitok.
  */
 int
-alq_write(struct alq *alq, void *data, int waitok)
+alq_write(struct alq *alq, void *data, int flags)
 {
-	struct ale *ale;
+	/* should only be called in fixed length message (legacy) mode */
+	KASSERT(alq->aq_entmax > 0 && alq->aq_entlen > 0,
+		("%s: fixed length write on variable length queue", __func__)
+	);
+	return (alq_writen(alq, data, alq->aq_entlen, flags));
+}
+
+int
+alq_writen(struct alq *alq, void *data, int len, int flags)
+{
+	int activate = 0;
+	int copy = len;
+
+	KASSERT(len > 0 && len < alq->aq_buflen,
+		("%s: len <= 0 || len > alq->aq_buflen", __func__)
+	);
+
+	ALQ_LOCK(alq);
+
+	/*
+	 * If the message is larger than our underlying buffer or
+	 * there is not enough free space in our underlying buffer
+	 * to accept the message and the user can't wait, return
+	 */
+	if ((len > alq->aq_buflen) ||
+		((flags & ALQ_NOWAIT) && (alq->aq_freebytes < len))) {
+		ALQ_UNLOCK(alq);
+		return (EWOULDBLOCK);
+	}
+
+	/*
+	 * ALQ_WAITOK or alq->aq_freebytes > len,
+	 * either spin until we have enough free bytes (former) or skip (latter)
+	 */
+	while (alq->aq_freebytes < len && (alq->aq_flags & AQ_SHUTDOWN) == 0) {
+		alq->aq_flags |= AQ_WANTED;
+		msleep_spin(alq, &alq->aq_mtx, "alqwriten", 0);
+	}
 
-	if ((ale = alq_get(alq, waitok)) == NULL)
+	/*
+	 * we need to serialise wakups to ensure records remain in order...
+	 * therefore, wakeup the next thread in the queue waiting for
+	 * alq resources to be available
+	 * (technically this is only required if we actually entered the above
+	 * while loop)
+	 */
+	wakeup_one(alq);
+
+	/* bail if we're shutting down */
+	if (alq->aq_flags & AQ_SHUTDOWN) {
+		ALQ_UNLOCK(alq);
 		return (EWOULDBLOCK);
+	}
+
+	/*
+	 * if we need to wrap the buffer to accommodate the write,
+	 * we'll need 2 calls to bcopy
+	 */
+	if ((alq->aq_buflen - alq->aq_writehead) < len)
+		copy = alq->aq_buflen - alq->aq_writehead;
+
+	/* copy (part of) message to the buffer */
+	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
+	alq->aq_writehead += copy;
+
+	if (copy != len) {
+		/*
+		 * wrap the buffer by copying the remainder of our message
+		 * to the start of the buffer and resetting the head ptr
+		 */
+		bcopy(data, alq->aq_entbuf, len - copy);
+		alq->aq_writehead = copy;
+	}
 
-	bcopy(data, ale->ae_data, alq->aq_entlen);
-	alq_post(alq, ale);
+	alq->aq_freebytes -= len;
+
+	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
+		alq->aq_flags |= AQ_ACTIVE;
+		activate = 1;
+	}
+
+	ALQ_UNLOCK(alq);
+
+	if (activate) {
+		ALD_LOCK();
+		ald_activate(alq);
+		ALD_UNLOCK();
+	}
 
 	return (0);
 }
 
 struct ale *
-alq_get(struct alq *alq, int waitok)
+alq_get(struct alq *alq, int flags)
+{
+	/* should only be called in fixed length message (legacy) mode */
+	KASSERT(alq->aq_entmax > 0 && alq->aq_entlen > 0,
+		("%s: fixed length get on variable length queue", __func__)
+	);
+	return (alq_getn(alq, alq->aq_entlen, flags));
+}
+
+struct ale *
+alq_getn(struct alq *alq, int len, int flags)
 {
 	struct ale *ale;
-	struct ale *aln;
+	int contigbytes;
 
-	ale = NULL;
+	ale = malloc(	sizeof(struct ale),
+			M_ALD,
+			(flags & ALQ_NOWAIT) ? M_NOWAIT : M_WAITOK
+	);
+
+	if (ale == NULL)
+		return (NULL);
 
 	ALQ_LOCK(alq);
 
-	/* Loop until we get an entry or we're shutting down */
-	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 && 
-	    (ale = alq->aq_entfree) == NULL &&
-	    (waitok & ALQ_WAITOK)) {
-		alq->aq_flags |= AQ_WANTED;
-		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
+	/* determine the number of free contiguous bytes */
+	if (alq->aq_writehead <= alq->aq_writetail)
+		contigbytes = alq->aq_freebytes;
+	else
+		contigbytes = alq->aq_buflen - alq->aq_writehead;
+
+	/*
+	 * If the message is larger than our underlying buffer or
+	 * there is not enough free contiguous space in our underlying buffer
+	 * to accept the message and the user can't wait, return
+	 */
+	if ((len > alq->aq_buflen) ||
+		((flags & ALQ_NOWAIT) && (contigbytes < len))) {
+		ALQ_UNLOCK(alq);
+		return (NULL);
 	}
 
-	if (ale != NULL) {
-		aln = ale->ae_next;
-		if ((aln->ae_flags & AE_VALID) == 0)
-			alq->aq_entfree = aln;
+	/*
+	 * ALQ_WAITOK or contigbytes > len,
+	 * either spin until we have enough free contiguous bytes (former)
+	 * or skip (latter)
+	 */
+	while (contigbytes < len && (alq->aq_flags & AQ_SHUTDOWN) == 0) {
+		alq->aq_flags |= AQ_WANTED;
+		msleep_spin(alq, &alq->aq_mtx, "alqgetn", 0);
+		if (alq->aq_writehead <= alq->aq_writetail)
+			contigbytes = alq->aq_freebytes;
 		else
-			alq->aq_entfree = NULL;
-	} else
+			contigbytes = alq->aq_buflen - alq->aq_writehead;
+	}
+
+	/*
+	 * we need to serialise wakups to ensure records remain in order...
+	 * therefore, wakeup the next thread in the queue waiting for
+	 * alq resources to be available
+	 * (technically this is only required if we actually entered the above
+	 * while loop)
+	 */
+	wakeup_one(alq);
+
+	/* bail if we're shutting down */
+	if (alq->aq_flags & AQ_SHUTDOWN) {
 		ALQ_UNLOCK(alq);
+		return (NULL);
+	}
 
+	/*
+	 * If we are here, we have a contiguous number of bytes >= len
+	 * available in our buffer starting at aq_writehead.
+	 */
+	ale->ae_data = alq->aq_entbuf + alq->aq_writehead;
+	ale->ae_datalen = len;
+	alq->aq_writehead += len;
+	alq->aq_freebytes -= len;
 
 	return (ale);
 }
@@ -452,11 +614,6 @@ alq_post(struct alq *alq, struct ale *al
 {
 	int activate;
 
-	ale->ae_flags |= AE_VALID;
-
-	if (alq->aq_entvalid == NULL)
-		alq->aq_entvalid = ale;
-
 	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
 		alq->aq_flags |= AQ_ACTIVE;
 		activate = 1;
@@ -464,11 +621,14 @@ alq_post(struct alq *alq, struct ale *al
 		activate = 0;
 
 	ALQ_UNLOCK(alq);
+
 	if (activate) {
 		ALD_LOCK();
 		ald_activate(alq);
 		ALD_UNLOCK();
 	}
+
+	free(ale, M_ALD);
 }
 
 void
@@ -487,7 +647,7 @@ alq_flush(struct alq *alq)
 	ALQ_UNLOCK(alq);
 
 	if (needwakeup)
-		wakeup(alq);
+		wakeup_one(alq);
 }
 
 /*
@@ -509,7 +669,49 @@ alq_close(struct alq *alq)
 	alq_shutdown(alq);
 
 	mtx_destroy(&alq->aq_mtx);
-	free(alq->aq_first, M_ALD);
 	free(alq->aq_entbuf, M_ALD);
 	free(alq, M_ALD);
 }
+
+static int alq_load_handler(module_t mod, int what, void *arg)
+{
+	int ret = 0;
+
+	switch(what) {
+		case MOD_LOAD:
+		case MOD_UNLOAD:
+		case MOD_SHUTDOWN:
+			break;
+		
+		case MOD_QUIESCE:
+			ALD_LOCK();
+			/* only allow unload if there are no open queues */
+			if (LIST_FIRST(&ald_queues) == NULL) {
+				ald_shutingdown = 1;
+				ALD_UNLOCK();
+				ald_shutdown(NULL, 0);
+				mtx_destroy(&ald_mtx);
+			} else {
+				ALD_UNLOCK();
+				ret = EBUSY;
+			}
+			break;
+		
+		default:
+			ret = EINVAL;
+			break;
+	}
+
+	return (ret);
+}
+
+/* basic module data */
+static moduledata_t alq_mod =
+{
+	"alq",
+	alq_load_handler, /* execution entry point for the module */
+	NULL
+};
+
+DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
+MODULE_VERSION(alq, 1);

Modified: user/lstewart/dummynet_8.x/sys/modules/Makefile
==============================================================================
--- user/lstewart/dummynet_8.x/sys/modules/Makefile	Tue Nov 18 13:24:38 2008	(r185050)
+++ user/lstewart/dummynet_8.x/sys/modules/Makefile	Tue Nov 18 13:36:01 2008	(r185051)
@@ -17,6 +17,7 @@ SUBDIR=	${_3dfx} \
 	${_aic} \
 	aic7xxx \
 	aio \
+	alq \
 	${_amd} \
 	ale \
 	amr \

Modified: user/lstewart/dummynet_8.x/sys/netinet/ip_dummynet.c
==============================================================================
--- user/lstewart/dummynet_8.x/sys/netinet/ip_dummynet.c	Tue Nov 18 13:24:38 2008	(r185050)
+++ user/lstewart/dummynet_8.x/sys/netinet/ip_dummynet.c	Tue Nov 18 13:36:01 2008	(r185051)
@@ -64,28 +64,39 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/priv.h>
+#include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
+#include <sys/alq.h>
+#include <sys/sbuf.h>
+#include <sys/hash.h>
+#include <sys/unistd.h>
+
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
 
 #include <netinet/if_ether.h> /* for struct arpcom */
 
 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
 #include <netinet6/ip6_var.h>
 
+#include <machine/in_cksum.h>
+
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
@@ -154,6 +165,73 @@ static struct callout dn_timeout;
 
 extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
+#define DN_LOG(fs, p, q, m, dropped, dir) \
+	if (dn_log_enable) \
+		dn_log((fs), (p), (q), (m), (dropped), (dir));
+
+#define CAST_PTR_INT(X) (*((int*)(X)))
+
+struct log_node {
+	/* log msg creation timestamp */
+	struct timeval	tval;
+	/*
+	 * direction of packet after dummynet finishes processing it
+	 * (defined in ip_dummynet.h DN_TO_IP_OUT, DN_TO_IP_IN, ...)
+	 */
+	int	direction;
+	/*
+	 * pkt dropped yes/no + reason if dropped (see DN_DROP_X defines in
+	 * ip_dummynet.h)
+	 */
+	uint32_t	dropped;
+	/* hash of the pkt which triggered the log msg */
+	uint32_t	hash;
+	/* IP version log_node relates to; either INP_IPV4 or INP_IPV6 */
+	uint8_t		ipver;
+	/* flow set number */
+	int	fs_num;
+	/* flags set on the flow set */
+	uint16_t	fs_flags;
+	/* pipe number */
+	int	p_num;
+	/* current pipe occupancy */
+	int	p_len;
+	/*
+	 * max queue len in either pkts or bytes (depending on whether
+	 * DN_QSIZE_IS_BYTES is set in fs_flags)
+	 */
+	int	q_max_len;
+	/* current queue occupancy in pkts */
+	int	q_len_pkts;
+	/* current queue occupancy in bytes */
+	int	q_len_bytes;
+
+	STAILQ_ENTRY(log_node) nodes;
+};
+
+/*
+ * in_pcb.h defines INP_IPV4 as 0x1 and INP_IPV6 as 0x2,
+ * which we use as an index into this array
+ */
+static char	ipver[3] = {'\0', '4', '6'};
+static int	dn_sysctl_log_enable_handler(SYSCTL_HANDLER_ARGS);
+static int	dn_sysctl_logfile_name_handler(SYSCTL_HANDLER_ARGS);
+static u_int	dn_log_enable = 0;
+static char	dn_logfile[PATH_MAX] = "/var/log/dummynet.log\0";
+STAILQ_HEAD(loghead, log_node) log_queue = STAILQ_HEAD_INITIALIZER(log_queue);
+static struct	mtx dn_log_queue_mtx;
+static int	wait_for_log;
+static struct alq *dn_alq = NULL;
+static volatile uint32_t dn_exit_log_manager_thread = 0;
+static struct thread *dn_log_manager_thr = NULL;
+
+#define DN_LOG_FILE_MODE 0644
+#define DN_ALQ_BUFLEN 200000
+#define DN_MAX_LOG_MSG_LEN 60
+
+#define DN_LOG_DISABLE	0
+#define DN_LOG_ENABLE	1
+
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
@@ -206,6 +284,12 @@ SYSCTL_LONG(_net_inet_ip_dummynet, OID_A
     CTLFLAG_RW, &pipe_slot_limit, 0, "Upper limit in slots for pipe queue.");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
     CTLFLAG_RW, &pipe_byte_limit, 0, "Upper limit in bytes for pipe queue.");
+SYSCTL_OID(_net_inet_ip_dummynet, OID_AUTO, log_enable, CTLTYPE_UINT|CTLFLAG_RW,
+    &dn_log_enable, 0, &dn_sysctl_log_enable_handler, "IU",
+    "switch dummynet data logging on/off");
+SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, logfile,
+    CTLTYPE_STRING|CTLFLAG_RW, &dn_logfile, sizeof(dn_logfile),
+    &dn_sysctl_logfile_name_handler, "A", "file to save dummynet log data to");
 #endif
 
 #ifdef DUMMYNET_DEBUG
@@ -451,6 +535,362 @@ heap_free(struct dn_heap *h)
  * --- end of heap management functions ---
  */
 
+static __inline void
+dn_process_log_node(struct log_node * log_node)
+{
+	char dn_log_msg[DN_MAX_LOG_MSG_LEN];
+
+	/* construct our log message */
+	snprintf( dn_log_msg,
+		DN_MAX_LOG_MSG_LEN,
+		"%d,0x%08x,%u.%06u,%u,%u,%d,0x%04x,%d,%d,%d,%d,%d\n",
+		log_node->direction,
+		log_node->hash,
+		(unsigned int)log_node->tval.tv_sec,
+		(unsigned int)log_node->tval.tv_usec,
+		ipver[log_node->ipver],
+		log_node->dropped,
+		log_node->fs_num,
+		log_node->fs_flags,
+		log_node->p_num,
+		log_node->p_len,
+		log_node->q_max_len,
+		log_node->q_len_pkts,
+		log_node->q_len_bytes
+	);
+
+	alq_writen(dn_alq, dn_log_msg, strlen(dn_log_msg), ALQ_WAITOK);
+}
+
+static void
+dn_log_manager_thread(void *arg)
+{
+	struct log_node *log_node, *log_node_temp;
+
+	/* loop until thread is signalled to exit */
+	while (!dn_exit_log_manager_thread) {
+		/*
+		 * sleep until we are signalled to wake because thread has
+		 * been told to exit or until 1 tick has passed
+		 */
+		tsleep(&wait_for_log, PWAIT, "logwait", 1);
+
+		/* Process logs until the queue is empty */
+		do {
+			log_node = NULL;
+
+			/* gain exclusive access to the queue */
+			mtx_lock(&dn_log_queue_mtx);
+
+			/* get the element at the head of the list */
+			if ((log_node = STAILQ_FIRST(&log_queue)) != NULL) {
+				/*
+				 * list wasn't empty, so let's remove the first
+				 * element from the list.
+				 * Note that STAILQ_REMOVE_HEAD doesn't delete
+				 * the log_node struct itself. It just
+				 * disentangles it from the list structure.
+				 * We have a copy of the node's ptr stored
+				 * in log_node.
+				 */
+				STAILQ_REMOVE_HEAD(&log_queue, nodes);
+			}
+			/*
+			 * We've finished making changes to the list. Unlock it
+			 * so the pfil hooks can continue queuing pkt_nodes
+			 */
+			mtx_unlock(&dn_log_queue_mtx);
+
+			/* if we successfully get a log_node from the list */
+			if (log_node != NULL) {
+				dn_process_log_node(log_node);
+				/*
+				 * free the memory that was
+				 * malloc'd in dn_log()
+				 */
+				free(log_node, M_DUMMYNET);
+			}
+
+		} while (log_node != NULL);
+	}
+
+	/* Flush all remaining log_nodes to the log file */
+
+	/* Lock the mutex so we gain exclusive access to the queue */
+	mtx_lock(&dn_log_queue_mtx);
+
+	STAILQ_FOREACH_SAFE(log_node, &log_queue, nodes, log_node_temp) {
+		dn_process_log_node(log_node);
+		STAILQ_REMOVE_HEAD(&log_queue, nodes);
+		free(log_node, M_DUMMYNET);
+	}
+
+	/* Reinit the list to mark it as empty and virgin */
+	STAILQ_INIT(&log_queue);
+
+	/* We've finished making changes to the list. Safe to unlock it. */
+	mtx_unlock(&dn_log_queue_mtx);
+
+	/* kthread_exit calls wakeup on our thread's struct pointer */
+	kthread_exit();
+}
+
+static int
+dn_sysctl_logfile_name_handler(SYSCTL_HANDLER_ARGS)
+{
+	struct alq *new_alq;
+
+	if (!req->newptr)
+		goto skip;
+
+	/* if old filename and new filename are different */
+	if (strncmp(dn_logfile, (char *)req->newptr, PATH_MAX)) {
+
+		int error = alq_open(	&new_alq,
+					req->newptr,
+					curthread->td_ucred,
+					DN_LOG_FILE_MODE,
+					DN_ALQ_BUFLEN,
+					0
+		);
+
+		/* bail if unable to create new alq */
+		if (error)
+			return 1;
+
+		/*
+		 * If disabled, dn_alq == NULL so we simply close
+		 * the alq as we've proved it can be opened.
+		 * If enabled, close the existing alq and switch the old for the new
+		 */
+		if (dn_alq == NULL)
+			alq_close(new_alq);
+		else {
+			alq_close(dn_alq);
+			dn_alq = new_alq;
+		}
+	}
+
+skip:
+	return sysctl_handle_string(oidp, arg1, arg2, req);
+}
+
+static int
+dn_manage_logging(uint8_t action)
+{
+	int ret, error = 0;
+	struct timeval tval;
+	struct sbuf *s = NULL;
+
+	/* init an autosizing sbuf that initially holds 200 chars */
+	if ((s = sbuf_new(NULL, NULL, 200, SBUF_AUTOEXTEND)) == NULL)
+		return -1;
+
+	if (action == DN_LOG_ENABLE) {
+
+		/* create our alq */
+		alq_open(	&dn_alq,
+				dn_logfile,
+				curthread->td_ucred,
+				DN_LOG_FILE_MODE,
+				DN_ALQ_BUFLEN,
+				0
+		);
+
+		STAILQ_INIT(&log_queue);
+
+		dn_exit_log_manager_thread = 0;
+
+		ret = kthread_add(	&dn_log_manager_thread,
+					NULL,
+					NULL,
+					&dn_log_manager_thr,
+					RFNOWAIT,
+					0,
+					"dn_log_manager_thr"
+		);
+
+		microtime(&tval);
+
+		sbuf_printf(s,
+			"enable_time_secs=%u\tenable_time_usecs=%06ld\thz=%u\tsysname=%s\tsysver=%u\n",
+			tval.tv_sec,
+			tval.tv_usec,
+			hz,
+			"FreeBSD",
+			__FreeBSD_version
+		);
+
+		sbuf_finish(s);
+		alq_writen(dn_alq, sbuf_data(s), sbuf_len(s), ALQ_WAITOK);
+	}
+	else if (action == DN_LOG_DISABLE && dn_log_manager_thr != NULL) {
+
+		/* tell the log manager thread that it should exit now */
+		dn_exit_log_manager_thread = 1;
+
+		/*
+		 * wake the pkt_manager thread so it realises that
+		 * dn_exit_log_manager_thread = 1 and exits gracefully
+		 */
+		wakeup(&wait_for_log);
+
+		/* wait for the pkt_manager thread to exit */
+		tsleep(dn_log_manager_thr, PWAIT, "thrwait", 0);
+
+		dn_log_manager_thr = NULL;
+
+		microtime(&tval);
+
+		sbuf_printf(s,
+			"disable_time_secs=%u\tdisable_time_usecs=%06ld",
+			tval.tv_sec,
+			tval.tv_usec
+		);
+
+		sbuf_printf(s, "\n");
+		sbuf_finish(s);
+		alq_writen(dn_alq, sbuf_data(s), sbuf_len(s), ALQ_WAITOK);
+		alq_close(dn_alq);
+		dn_alq = NULL;
+	}
+
+	sbuf_delete(s);
+
+	/*
+	 * XXX: Should be using ret to check if any functions fail
+	 * and set error appropriately
+	 */
+	return error;
+}
+
+static int
+dn_sysctl_log_enable_handler(SYSCTL_HANDLER_ARGS)
+{
+	if (!req->newptr)
+		goto skip;
+	
+	/* if the value passed in isn't DISABLE or ENABLE, return an error */
+	if (CAST_PTR_INT(req->newptr) != DN_LOG_DISABLE &&
+		CAST_PTR_INT(req->newptr) != DN_LOG_ENABLE)
+		return 1;
+	
+	/* if we are changing state (DISABLE to ENABLE or vice versa) */
+	if (CAST_PTR_INT(req->newptr) != dn_log_enable )
+		if (dn_manage_logging(CAST_PTR_INT(req->newptr))) {
+			dn_manage_logging(DN_LOG_DISABLE);
+			return 1;
+		}
+
+skip:
+	return sysctl_handle_int(oidp, arg1, arg2, req);
+}
+
+static uint32_t
+hash_pkt(struct mbuf *m, uint32_t offset)
+{
+	register uint32_t hash = 0;
+
+	while ((m != NULL) && (offset > m->m_len)) {
+		/*
+		 * the IP packet payload does not start in this mbuf
+		 * need to figure out which mbuf it starts in and what offset
+		 * into the mbuf's data region the payload starts at
+		 */
+		offset -= m->m_len;
+		m = m->m_next;
+	}
+
+	while (m != NULL) {
+		/* ensure there is data in the mbuf */
+		if ((m->m_len - offset) > 0) {
+			hash = hash32_buf(	m->m_data + offset,
+						m->m_len - offset,
+						hash
+			);
+                }
+
+		m = m->m_next;
+		offset = 0;
+        }
+
+	return hash;
+}
+
+static void
+dn_log(	struct dn_flow_set *fs,
+	struct dn_pipe *p,
+	struct dn_flow_queue *q,
+	struct mbuf *pkt,
+	u_int dropped,
+	int dir)
+{
+	struct log_node *log_node;
+
+	DUMMYNET_LOCK_ASSERT();
+
+	/* M_NOWAIT flag required here */
+	log_node = malloc(sizeof(struct log_node), M_DUMMYNET, M_NOWAIT);
+
+	if (log_node == NULL)
+		return;
+
+	/* set log_node struct members */
+	microtime(&(log_node->tval));
+	log_node->direction = dir;
+	log_node->dropped = dropped;
+	log_node->ipver = INP_IPV4;
+	log_node->fs_num = (dropped == DN_DROP_NOFS) ?
+				-1 : fs->fs_nr;
+	log_node->fs_flags = (dropped == DN_DROP_NOFS) ?
+				0 : fs->flags_fs;
+	log_node->q_max_len = (dropped == DN_DROP_NOFS) ?
+				-1 : fs->qsize;
+	log_node->p_num = (dropped == DN_DROP_NOFS ||
+				dropped == DN_DROP_NOP4Q) ?
+					-1 : p->pipe_nr;
+	log_node->p_len = (dropped == DN_DROP_NOFS ||
+				dropped == DN_DROP_NOP4Q) ?
+					-1 : p->len;
+	log_node->q_len_pkts = (dropped == DN_DROP_NOFS ||
+					dropped == DN_DROP_NOQ) ?
+						-1 : q->len;
+	log_node->q_len_bytes = (dropped == DN_DROP_NOFS ||
+					dropped == DN_DROP_NOQ) ?
+						-1 : q->len_bytes;
+
+	/*

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list