svn commit: r201776 - user/luigi/ipfw3-head/sys/netinet/ipfw

Fri Jan 8 09:31:20 UTC 2010

Author: luigi
Date: Fri Jan  8 09:31:19 2010
New Revision: 201776
URL: http://svn.freebsd.org/changeset/base/201776

Log:
  snapshot

Modified:
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c
==============================================================================

--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c	Fri Jan  8 09:31:18 2010	(r201775)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c	Fri Jan  8 09:31:19 2010	(r201776)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
  * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
  * All rights reserved
  *
@@ -107,14 +108,6 @@ static unsigned long	io_pkt_drop;
 static struct dn_heap *system_heap;
 
 /*
- * Three heaps contain queues and pipes that the scheduler handles:
- *
- * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
- *
- * wfq_ready_heap contains the pipes associated with WF2Q flows
- *
- * extract_heap contains pipes associated with delay lines.
- *
  * The key for the heap is used for two different values:
  *
  * 1. timer ticks- max 10K/second, so 32 bits are enough;
@@ -130,18 +123,14 @@ static struct dn_heap *system_heap;
  * MAX64 returns the largest of two key values.
  * MY_M is used as a shift count when doing fixed point arithmetic
  * (a better name would be useful...).
+ * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
+ * virtual time wraps every 15 days.
  */
 #define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
 #define MY_M    16 /* shift for fixed point arithmetic */
   
-/*
- * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
- * virtual time wraps every 15 days.
- */
-
 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
 
-struct dn_heap ready_heap, extract_heap, wfq_ready_heap ;
 struct new_pipe_head	pipehash[DN_HASHSIZE];	/* all pipes */
 struct new_fs_head	flowsethash[DN_HASHSIZE];	/* all flowsets */
 
@@ -158,10 +147,6 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AU
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
     CTLFLAG_RD, &curr_time, 0, "Current tick");
 #endif
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
-    CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
-    CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
     CTLFLAG_RD, &searches, 0, "Number of queue searches");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
@@ -234,14 +219,12 @@ static void	dummynet_send(struct mbuf *)
 
 /*
  * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state.  This is used within
- * the dummynet code as well as outside when checking for special
- * processing requirements.
- * Note that the first part is the reinject info and is common to
- * other forms of packet reinjection.
+ * them that carries their dummynet state.
+ * Outside dummynet, only the 'rule' field is relevant, and it must
+ * be at the beginning of the structure.
  */
 struct dn_pkt_tag {
-	struct ipfw_rule_ref rule;	/* matching rule		*/
+	struct ipfw_rule_ref rule;	/* matching rule	*/
 
 	/* second part, dummynet specific */
 	int dn_dir;		/* action when packet comes out.*/
@@ -255,6 +238,7 @@ struct dn_pkt_tag {
  * Return the mbuf tag holding the dummynet state.  As an optimization
  * this is assumed to be the first tag on the list.  If this turns out
  * wrong we'll need to search the list.
+ * XXX OK
  */
 static struct dn_pkt_tag *
 dn_tag_get(struct mbuf *m)
@@ -271,43 +255,44 @@ dn_tag_get(struct mbuf *m)
  * It is called when we have some packet from delay line to send.
  * If there are leftover packets, this delay line is reinserted into extract
  * heap
+ * XXX OK
  */
-static
-struct mbuf *
+static struct mbuf *
 transmit_event(struct delay_line *dline, dn_key l_curr_time)
 {
-    struct mbuf *m;
-    struct dn_pkt_tag *pkt;
+	struct mbuf *m;
+	struct dn_pkt_tag *pkt;
 
-    struct mbuf *head = NULL, *tail = NULL;
-    /* XXX scheduler lock */
-    while ((m = dline->head) != NULL) {
-        pkt = dn_tag_get(m);
-        if (!DN_KEY_LEQ(pkt->output_time, l_curr_time))
-            break;
-        dline->head = m->m_nextpkt;
-        if (tail != NULL)
-            tail->m_nextpkt = m;
-        else
-            head = m;
-        tail = m;
-    }
+	struct mbuf *head = NULL, *tail = NULL;
+	/* XXX scheduler lock */
+	while ((m = dline->head) != NULL) {
+		pkt = dn_tag_get(m);
+		if (!DN_KEY_LEQ(pkt->output_time, l_curr_time))
+			break;
+		dline->head = m->m_nextpkt;
+		if (tail != NULL)
+			tail->m_nextpkt = m;
+		else
+			head = m;
+		tail = m;
+	}
 
-    if (tail != NULL)
-        tail->m_nextpkt = NULL;
+	if (tail != NULL)
+		tail->m_nextpkt = NULL;
 
-    /* If there are leftover packets, put into the heap for next event. */
-    if ((m = dline->head) != NULL) {
-        pkt = dn_tag_get(m);
-        //DN_HEAP_LOCK();
-        heap_insert(system_heap, pkt->output_time, dline);
-        //DN_HEAP_UNLOCK();
-    }
-    /* XXX scheduler unlock */
-    return head;
+	/* If there are leftover packets, put into the heap for next event. */
+	if ((m = dline->head) != NULL) {
+		pkt = dn_tag_get(m);
+		//DN_HEAP_LOCK();
+		heap_insert(system_heap, pkt->output_time, dline);
+		//DN_HEAP_UNLOCK();
+	}
+	/* XXX scheduler unlock */
+	return head;
 }
 
 #define div64(a, b)	((int64_t)(a) / (int64_t)(b))
+
 /*
  * Compute how many ticks we have to wait before being able to send
  * a packet. This is computed as the "wire time" for the packet
@@ -350,27 +335,230 @@ compute_extra_bits(struct mbuf *pkt, str
 	return extra_bits;
 }
 
-/* Insert packet pkt into delay line of si. */
+/* Insert packet pkt into delay line */
 static void
-move_pkt(struct mbuf *pkt, struct new_pipe *p, struct new_sch_inst *si,
+move_pkt(struct mbuf *pkt, struct new_pipe *p, struct delay_line *d,
           dn_key l_curr_time)
 {
-    struct dn_pkt_tag *dt = dn_tag_get(pkt);
-    struct delay_line *d = si->dline;
- 
-    dt->output_time = l_curr_time + p->delay ;
+	struct dn_pkt_tag *dt = dn_tag_get(pkt);
+
+	dt->output_time = l_curr_time + p->delay ;
+
+	if (d->head == NULL)
+		d->head = pkt;
+	else
+		d->tail->m_nextpkt = pkt;
+	d->tail = pkt;
+	d->tail->m_nextpkt = NULL;
+}
+
+/* Do masking depending of flow id */
+static struct ipfw_flow_id *
+do_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
+{
+	int is_v6 = IS_IP6_FLOW_ID(id);
+
+	id->dst_port &= mask->dst_port;
+	id->src_port &= mask->src_port;
+	id->proto &= mask->proto;
+	id->flags = 0; /* we don't care about this one */
+	if (is_v6) {
+		APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
+		APPLY_MASK(&id->src_ip6, &mask->src_ip6);
+		id->flow_id6 &= mask->flow_id6;
+	} else {
+		id->dst_ip &= mask->dst_ip;
+		id->src_ip &= mask->src_ip;
+	}
+
+	return id;
+}
+
+/*
+ * Calculate the hash of a flow id.
+ * XXX we may want a better hash function
+ */
+static int
+do_hash(struct ipfw_flow_id *id)
+{
+    int i;
+    int is_v6 = IS_IP6_FLOW_ID(id);
+        
+    if (is_v6) {
+        i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^
+            ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^
+            ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^
+            ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^
+       
+            ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^
+            ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^
+            ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^
+            ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^
+            
+            ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^
+            ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^
+            ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^
+            ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^
+   
+            ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^
+            ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^
+            ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^
+            ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^
+        
+            (id->dst_port << 1) ^ (id->src_port) ^
+            (id->proto ) ^
+            (id->flow_id6);
+    } else {
+        i = ( (id->dst_ip) & 0xffff ) ^
+            ( (id->dst_ip >> 15) & 0xffff ) ^
+            ( (id->src_ip << 1) & 0xffff ) ^
+            ( (id->src_ip >> 16 ) & 0xffff ) ^
+            (id->dst_port << 1) ^ (id->src_port) ^
+            (id->proto );
+    }
+    return i;
+}
+
+/*
+ * returns 0 masks match,
+ * returns 1 otherwise
+ */
+static int
+mask_are_equals (struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
+{
+	int is_v6 = IS_IP6_FLOW_ID(id1);
+	if (is_v6 != IS_IP6_FLOW_ID(id2))
+		return 1; /* a ipv4 and a ipv6 flow */
+	    
+	if (!is_v6 && id1->dst_ip == id2->dst_ip &&
+	    id1->src_ip == id2->src_ip &&
+	    id1->dst_port == id2->dst_port &&
+	    id1->src_port == id2->src_port &&
+	    id1->proto == id2->proto &&
+	    id1->flags == id2->flags)
+		return 0;
+	    
+	if (is_v6 &&
+	    IN6_ARE_ADDR_EQUAL(&id1->dst_ip6,&id2->dst_ip6) &&
+	    IN6_ARE_ADDR_EQUAL(&id1->src_ip6,&id2->src_ip6) &&
+	    id1->dst_port == id2->dst_port &&
+	    id1->src_port == id2->src_port &&
+	    id1->proto == id2->proto &&
+	    id1->flags == id2->flags &&
+	    id1->flow_id6 == id2->flow_id6)
+		return 0;
+     
+	/* Masks differ */
+	return 1;
+}
+
+/*
+ * Create a new scheduler instance for the scheduler 'sch_t'.
+ * Allocate memory for common and scheduler private data.
+ * XXX put the delay line within the instance ?
+ * XXX why do we need separate delay lines ?
+ */
+static struct new_sch_inst *
+create_scheduler_instance(struct new_sch *sch_t, dn_key l_curr_time)
+{
+	struct new_sch_inst *si;
+	int ret;
+	const char *msg = "malloc failure";
+	int l = sizeof(*si) + sch_t->fp->scheduler_i_size;
+
+	si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+
+	if (si == NULL)
+		goto error;
+
+	si->dline = malloc(sizeof(struct delay_line), M_DUMMYNET, M_NOWAIT);
+	if (si->dline == NULL)
+		goto error;
+	si->dline->si = si;
+	set_oid(&si->dline->id, DN_DELAY_LINE, 0, sizeof(struct delay_line));
+	si->dline->head = si->dline->tail = NULL;
+
+	set_oid(&si->oid, DN_SCH_I, 0, l);
+
+	si->sched_nr = sch_t->sched_nr;
+	si->ptr_sched = sch_t;
+
+	/* XXX do we make assumption on this starting with dn_id ? */
+	ret = sch_t->fp->new_sched((void *)(si + 1));
+	if (ret) {
+		msg = "new_sched error";
+		goto error;
+	}
 
-    if (d->head == NULL)
-        d->head = pkt;
-    else
-        d->tail->m_nextpkt = pkt;
-    d->tail = pkt;
-    d->tail->m_nextpkt = NULL;
+	/* Initialize scheduler instance queues list */
+	SLIST_INIT(&si->ql_list);
+	     
+	si->oid.subtype = ((struct dn_id*)(si + 1))->subtype;
+	((struct dn_id*)(si + 1))->type = DN_SCH_I;
+	si->idle_time = l_curr_time;
+	return si;
+
+error:
+	printf("%s: %s\n", __FUNCTION__, msg);
+	if (si)
+		free(si, M_DUMMYNET);
+        return NULL;
 }
 
-struct new_sch_inst *
+static struct new_sch_inst *
 find_scheduler(struct new_sch *sch_t, struct new_fs *fs,
-                struct ipfw_flow_id *id, dn_key l_curr_time);
+                struct ipfw_flow_id *id, dn_key l_curr_time)
+{
+    struct new_sch_inst *prev, *s; /* returning scheduler instance */
+    struct ipfw_flow_id *id_t;
+    int i = 0;
+
+    id_t = malloc(sizeof(struct ipfw_flow_id), M_DUMMYNET, M_NOWAIT);
+    if (id_t == NULL) {
+        printf("dummynet: no memory for flowid\n");
+        return NULL;
+    }
+    /* XXX check return value */
+    *id_t = *id; /* The original id isn't modified */
+    do_mask(&sch_t->sched_mask, id_t);
+    if ( !(sch_t->flags & DN_SCH_HAVE_MASK) ) {
+        s = sch_t->sch_i[0];
+    } else {
+        /* first, do the masking, then hash */
+        i = do_hash(id_t);
+        i = i % sch_t->sch_i_size;
+        /* finally, scan the current hash bucket for a match */
+        searches++ ;
+        for (prev=NULL, s = sch_t->sch_i[i] ; s ; ) {
+            search_steps++;
+            if (!mask_are_equals(id_t, &s->id))
+                break; /* found */
+            prev = s ;
+            s = s->next ;
+        }
+        if (s && prev != NULL) { /* found and not in front */
+            prev->next = s->next ;
+            s->next = sch_t->sch_i[i] ;
+            sch_t->sch_i[i] = s ;
+        }
+    }
+   
+    if (s == NULL) { /* no match, need to allocate a new entry */
+        s = create_scheduler_instance(sch_t, l_curr_time);
+        if (s == NULL)
+            return NULL;
+        /* Link scheduler in front of array */
+        s->next = sch_t->sch_i[i];
+        sch_t->sch_i[i] = s;
+        sch_t->sch_i_elements++;
+        if (s != NULL) {
+            s->id = *id_t;
+            s->hash_slot = i;
+        }
+    }
+    return s;
+}
+
 
 /*
  * The timer handler for dummynet. Time is computed in ticks, but
@@ -871,30 +1059,6 @@ ipdn_locate_pipe(int pipe_nr)
 	return (NULL);
 }
 
-struct ipfw_flow_id *
-do_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id);
-/* Do masking depending of flow id */
-struct ipfw_flow_id *
-do_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
-{
-    int is_v6 = IS_IP6_FLOW_ID(id);
-
-    id->dst_port &= mask->dst_port;
-    id->src_port &= mask->src_port;
-    id->proto &= mask->proto;
-    id->flags = 0; /* we don't care about this one */
-    if (is_v6) {
-        APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
-        APPLY_MASK(&id->src_ip6, &mask->src_ip6);
-        id->flow_id6 &= mask->flow_id6;
-    }
-    else {
-        id->dst_ip &= mask->dst_ip;
-        id->src_ip &= mask->src_ip;
-    }
-        
-    return id;
-}
 
 /*
  * dummynet hook for packets. Below 'pipe' is a pipe or a queue
@@ -1035,7 +1199,7 @@ dummynet_io(struct mbuf **m0, int dir, s
                             compute_extra_bits(tosend, pipe) * hz : 0;
             sch_inst->numbytes -= len_scaled;
             /* Move packet in the delay line XXX three parameters? */
-            move_pkt(tosend, pipe, sch_inst, l_curr_time);
+            move_pkt(tosend, pipe, sch_inst->dline, l_curr_time);
             if (sch_inst->numbytes < 0) {
                 /*
                  * Credit became negative, so insert the instance in the

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c	Fri Jan  8 09:31:18 2010	(r201775)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c	Fri Jan  8 09:31:19 2010	(r201776)
@@ -183,10 +183,6 @@ dummynet_flush(void)
 	int i;
 
 	DUMMYNET_LOCK();
-	/* Free heaps so we don't have unwanted events. */
-	heap_free(&ready_heap);
-	heap_free(&wfq_ready_heap);
-	heap_free(&extract_heap);
 
 	/*
 	 * Now purge all queued pkts and delete all pipes.
@@ -824,9 +820,6 @@ ip_dn_init(void)
 		SLIST_INIT(&pipehash[i]);
 		SLIST_INIT(&flowsethash[i]);
 	}
-	bzero(&ready_heap, sizeof(ready_heap));
-	bzero(&wfq_ready_heap, sizeof(wfq_ready_heap));
-	bzero(&extract_heap, sizeof(extract_heap));
 
 	ip_dn_ctl_ptr = ip_dn_ctl;
 	ip_dn_io_ptr = dummynet_io;