ports/net/click anyone ?
Luigi Rizzo
rizzo at iet.unipi.it
Fri Jul 1 17:24:31 UTC 2011
[Cc to ports at freebsd,org, but please followup at net at freebsd.org]
anyone interested in taking over maintainership of ports/net/click ?
We have 1.5.0 in the tree, which is old and partly broken.
1.8.0 builds almost without problems, and the two patches i am
attaching give huge speedups for the userland version, especially
if used together with netmap. To get an idea of what you can get
on a single core i7-870 CPU with the stock version and with
these patches:
1.8.0 With patches
InfiniteSource -> Discard 515Kpps 18.56Mpps
InfiniteSource -> Queue -> Discard 500Kpps 13.41Mpps
pcap netmap
FromDevice->Queue->ToDevice 420Kpps 3.97 Mpps
--- Details ----
Click userland performance was never a priority given the high cost
(until now) of packet I/O. But once packet i/o has become quite fast,
it turns out that there are to other big offenders:
- the C++ memory allocator is quite expensive, and replacing it with
thread-local freelists (Packet objects and data buffers can be made
all with the same size) gives huge savings -- 100ns per packet or more
even on a fast machine;
- everytime an element wants a timestamp, it calls a syscall (gettimeofday()
or similar) which consumes another 400-800ns per call. There are many
elements (e.g. InfiniteSource, Counter, etc.) which timestamp packets.
Attached there are a couple of patches which address these problems:
- patch-pcap makes FromDevice and ToDevice use libpcap properly,
supporting I/O in bursts to amortize the syscall overhead;
- patch-more
+ introduces a NOTS option for InfiniteSource to remove timestamps.
This gives a 10x performance improvement in simple apps using InfiniteSource
+ replaces the allocator for Packet and data buffers with local freelists;
not thread safe, but this is easy to introduce. This gives another 1.5-2x
speed improvement after the 10x gained removing timestamps;
+ enables BURST operation in Discard, giving another 2x speed improvement
Using netmap instead of pcap is another big win, as you can see the forwarding
performance of a simple FromDevice->Queue->ToDevice chain goes up by 10x
You can find netmap at http://info.iet.unipi.it/~luigi/netmap/
cheers
luigi
v
-------------- next part --------------
diff -ur ../../work/click-1.8.0/elements/userlevel/fromdevice.cc ./elements/userlevel/fromdevice.cc
--- ../../work/click-1.8.0/elements/userlevel/fromdevice.cc 2010-02-28 21:03:33.000000000 +0100
+++ ./elements/userlevel/fromdevice.cc 2011-07-01 10:42:54.000000000 +0200
@@ -73,6 +73,7 @@
FromDevice::configure(Vector<String> &conf, ErrorHandler *errh)
{
bool promisc = false, outbound = false, sniffer = true;
+ _burst = 1;
_snaplen = 2046;
_headroom = Packet::default_headroom;
_headroom += (4 - (_headroom + 2) % 4) % 4; // default 4/2 alignment
@@ -89,9 +90,11 @@
"BPF_FILTER", 0, cpString, &bpf_filter,
"OUTBOUND", 0, cpBool, &outbound,
"HEADROOM", 0, cpUnsigned, &_headroom,
+ "BURST", 0, cpUnsigned, &_burst,
"ENCAP", cpkC, &has_encap, cpWord, &encap_type,
cpEnd) < 0)
return -1;
+ click_chatter("FromDevice(%s): burst: %d", _ifname.c_str(), _burst);
if (_snaplen > 8190 || _snaplen < 14)
return errh->error("SNAPLEN out of range");
if (_headroom > 8190)
@@ -361,6 +364,8 @@
p->set_packet_type_anno(Packet::MULTICAST);
}
+if (p->data() == 0 || p->length() < 16 || p->length() > 1536)
+ click_chatter("send pkt %p len %d", p->data(), p->length());
// set annotations
p->set_timestamp_anno(Timestamp::make_usec(pkthdr->ts.tv_sec, pkthdr->ts.tv_usec));
p->set_mac_header(p->data());
@@ -381,10 +386,11 @@
#if FROMDEVICE_PCAP
if (_capture == CAPTURE_PCAP) {
// Read and push() at most one packet.
- int r = pcap_dispatch(_pcap, 1, FromDevice_get_packet, (u_char *) this);
- if (r > 0)
- _pcap_task.reschedule();
- else if (r < 0 && ++_pcap_complaints < 5)
+ int r = pcap_dispatch(_pcap, _burst, FromDevice_get_packet, (u_char *) this);
+ if (r > 0) {
+ _count += r;
+ _pcap_task.fast_reschedule();
+ } else if (r < 0 && ++_pcap_complaints < 5)
ErrorHandler::default_handler()->error("%{element}: %s", this, pcap_geterr(_pcap));
}
#endif
@@ -421,12 +427,13 @@
FromDevice::run_task(Task *)
{
// Read and push() at most one packet.
- int r = pcap_dispatch(_pcap, 1, FromDevice_get_packet, (u_char *) this);
- if (r > 0)
- _pcap_task.fast_reschedule();
- else if (r < 0 && ++_pcap_complaints < 5)
+ int r = pcap_dispatch(_pcap, _burst, FromDevice_get_packet, (u_char *) this);
+ if (r > 0) {
+ _count += r;
+ _pcap_task.fast_reschedule(); // was fast_reschedule
+ } else if (r < 0 && ++_pcap_complaints < 5)
ErrorHandler::default_handler()->error("%{element}: %s", this, pcap_geterr(_pcap));
- return r > 0;
+ return r == (int)_burst;
}
#endif
diff -ur ../../work/click-1.8.0/elements/userlevel/fromdevice.hh ./elements/userlevel/fromdevice.hh
--- ../../work/click-1.8.0/elements/userlevel/fromdevice.hh 2010-02-28 21:06:16.000000000 +0100
+++ ./elements/userlevel/fromdevice.hh 2011-06-29 23:36:10.000000000 +0200
@@ -160,6 +160,7 @@
void selected(int fd);
#if FROMDEVICE_PCAP
+ inline pcap_t *pcap() const { return _pcap; }
bool run_task(Task *);
#endif
@@ -202,6 +203,8 @@
unsigned _headroom;
enum { CAPTURE_PCAP, CAPTURE_LINUX };
int _capture;
+
+ unsigned _burst;
#if FROMDEVICE_PCAP
String _bpf_filter;
#endif
diff -ur ../../work/click-1.8.0/elements/userlevel/todevice.cc ./elements/userlevel/todevice.cc
--- ../../work/click-1.8.0/elements/userlevel/todevice.cc 2009-09-03 22:36:30.000000000 +0200
+++ ./elements/userlevel/todevice.cc 2011-07-01 10:43:53.000000000 +0200
@@ -58,7 +58,7 @@
ToDevice::ToDevice()
: _task(this), _timer(&_task), _fd(-1), _my_fd(false),
_q(0),
- _pulls(0)
+ _pulls(0), _count(0)
{
}
@@ -70,13 +70,16 @@
ToDevice::configure(Vector<String> &conf, ErrorHandler *errh)
{
+ _burst = 1;
if (cp_va_kparse(conf, this, errh,
"DEVNAME", cpkP+cpkM, cpString, &_ifname,
+ "BURST", 0, cpUnsigned, &_burst,
"DEBUG", 0, cpBool, &_debug,
cpEnd) < 0)
return -1;
if (!_ifname)
return errh->error("interface not set");
+ click_chatter("ToDevice %s burst %d\n", _ifname.c_str(), _burst);
return 0;
}
@@ -85,6 +88,9 @@
{
_timer.initialize(this);
_fd = -1;
+#if TODEVICE_PCAP
+ _pcap = 0;
+#endif
#if TODEVICE_BSD_DEV_BPF
@@ -117,6 +123,9 @@
FromDevice *fdev = (FromDevice *)e->cast("FromDevice");
if (fdev && fdev->ifname() == _ifname && fdev->fd() >= 0) {
_fd = fdev->fd();
+# if TODEVICE_PCAP
+ _pcap = fdev->pcap();
+# endif
_my_fd = false;
}
}
@@ -170,14 +179,21 @@
bool
ToDevice::run_task(Task *)
{
- Packet *p = _q;
+ unsigned count;
+ Packet *p = _q; // previously saved packet
_q = 0;
- if (!p) {
- p = input(0).pull();
- _pulls++;
- }
+ for (count = 0; count < _burst; count++) {
+ if (!p) {
+ p = input(0).pull();
+ _pulls++;
+ if (!p) {
+ if (!_signal)
+ return false;
+ else
+ break;
+ }
+ }
- if (p) {
int retval;
const char *syscall;
@@ -187,6 +203,9 @@
#elif TODEVICE_SEND
retval = send(_fd, p->data(), p->length(), 0);
syscall = "send";
+#elif TODEVICE_INJECT
+ retval = ((uint32_t) pcap_inject(_pcap, p->data(), p->length()) == p->length() ? 0 : -1);
+ syscall = "pcap_inject";
#else
retval = 0;
#endif
@@ -194,6 +213,7 @@
if (retval >= 0) {
_backoff = 0;
checked_output_push(0, p);
+ _count++;
} else if (errno == ENOBUFS || errno == EAGAIN) {
assert(!_q);
@@ -215,15 +235,15 @@
return false;
} else {
- click_chatter("ToDevice(%s) %s: %s", _ifname.c_str(), syscall, strerror(errno));
+ click_chatter("ToDevice(%s) count %d pkt %p p %p len 0x%x errno %d %s: %s",
+ _ifname.c_str(), count, p, p->data(), p->length(), errno, syscall, strerror(errno));
checked_output_push(1, p);
}
- }
+ p = 0;
+ } // end for
- if (!p && !_signal)
- return false;
_task.fast_reschedule();
- return p != 0;
+ return true; // count == _burst;
}
void
@@ -234,7 +254,7 @@
}
-enum {H_DEBUG, H_SIGNAL, H_PULLS, H_Q};
+enum {H_DEBUG, H_SIGNAL, H_PULLS, H_Q, H_COUNT};
String
ToDevice::read_param(Element *e, void *thunk)
@@ -249,6 +269,8 @@
return String(td->_pulls);
case H_Q:
return String((bool) td->_q);
+ case H_COUNT:
+ return String(td->_count);
default:
return String();
}
diff -ur ../../work/click-1.8.0/elements/userlevel/todevice.hh ./elements/userlevel/todevice.hh
--- ../../work/click-1.8.0/elements/userlevel/todevice.hh 2009-09-03 22:36:30.000000000 +0200
+++ ./elements/userlevel/todevice.hh 2011-06-30 17:15:52.000000000 +0200
@@ -60,7 +60,10 @@
extern "C" {
# include <pcap.h>
}
-# if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+# if defined(__FreeBSD__)
+# define TODEVICE_PCAP 1
+# define TODEVICE_INJECT 1
+# elif defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
# define TODEVICE_BSD_DEV_BPF 1
# define TODEVICE_WRITE 1
# elif defined(__sun)
@@ -100,6 +103,10 @@
String _ifname;
int _fd;
bool _my_fd;
+ unsigned _burst;
+#ifdef TODEVICE_PCAP
+ pcap_t *_pcap;
+#endif
NotifierSignal _signal;
@@ -108,6 +115,7 @@
bool _debug;
bool _backoff;
int _pulls;
+ unsigned _count;
};
CLICK_ENDDECLS
-------------- next part --------------
diff -ur ../../work/click-1.8.0/elements/standard/discard.cc ./elements/standard/discard.cc
--- ../../work/click-1.8.0/elements/standard/discard.cc 2009-12-18 18:44:25.000000000 +0100
+++ ./elements/standard/discard.cc 2011-06-29 23:36:10.000000000 +0200
@@ -23,7 +23,7 @@
CLICK_DECLS
Discard::Discard()
- : _task(this), _count(0), _active(true)
+ : _task(this), _count(0), _active(true), _burst(1)
{
}
@@ -36,8 +36,11 @@
{
if (cp_va_kparse(conf, this, errh,
"ACTIVE", 0, cpBool, &_active,
+ "BURST", 0, cpInteger, &_burst,
cpEnd) < 0)
return -1;
+ if (_burst < 1 || _burst > 1000)
+ return errh->error("BURST must be 1..1000");
if (!_active && input_is_push(0))
return errh->error("ACTIVE is meaningless in push context");
return 0;
@@ -63,12 +66,17 @@
bool
Discard::run_task(Task *)
{
- Packet *p = input(0).pull();
- if (p) {
- _count++;
- p->kill();
- } else if (!_signal || !_active)
- return false;
+ Packet *p = 0;
+ int i;
+ for (i = _burst; i > 0; i--) {
+ p = input(0).pull();
+ if (p) {
+ _count++;
+ p->kill();
+ } else if (!_signal || !_active)
+ return false;
+ if (!p) break;
+ }
_task.fast_reschedule();
return p != 0;
}
diff -ur ../../work/click-1.8.0/elements/standard/discard.hh ./elements/standard/discard.hh
--- ../../work/click-1.8.0/elements/standard/discard.hh 2009-12-18 18:44:25.000000000 +0100
+++ ./elements/standard/discard.hh 2011-06-29 23:36:10.000000000 +0200
@@ -76,6 +76,8 @@
bool _active;
+ int _burst;
+
enum { h_reset_counts = 0, h_active = 1 };
static int write_handler(const String &, Element *, void *, ErrorHandler *);
diff -ur ../../work/click-1.8.0/elements/standard/infinitesource.cc ./elements/standard/infinitesource.cc
--- ../../work/click-1.8.0/elements/standard/infinitesource.cc 2009-12-02 19:47:56.000000000 +0100
+++ ./elements/standard/infinitesource.cc 2011-06-29 23:36:10.000000000 +0200
@@ -28,7 +28,7 @@
CLICK_DECLS
InfiniteSource::InfiniteSource()
- : _packet(0), _task(this)
+ : _packet(0), _task(this), _nots(false)
{
}
@@ -63,6 +63,7 @@
"BURST", cpkP, cpInteger, &burstsize,
"ACTIVE", cpkP, cpBool, &active,
"LENGTH", 0, cpInteger, &datasize,
+ "NOTS", 0, cpBool, &_nots,
"DATASIZE", 0, cpInteger, &datasize, // deprecated
"STOP", 0, cpBool, &stop,
cpEnd) < 0)
@@ -110,7 +111,8 @@
n = (_count > _limit ? 0 : _limit - _count);
for (int i = 0; i < n; i++) {
Packet *p = _packet->clone();
- p->timestamp_anno().assign_now();
+ if (!_nots)
+ p->timestamp_anno().assign_now();
output(0).push(p);
}
_count += n;
@@ -137,7 +139,8 @@
}
_count++;
Packet *p = _packet->clone();
- p->timestamp_anno().assign_now();
+ if (!_nots)
+ p->timestamp_anno().assign_now();
return p;
}
diff -ur ../../work/click-1.8.0/elements/standard/infinitesource.hh ./elements/standard/infinitesource.hh
--- ../../work/click-1.8.0/elements/standard/infinitesource.hh 2009-09-03 22:36:30.000000000 +0200
+++ ./elements/standard/infinitesource.hh 2011-06-29 23:36:10.000000000 +0200
@@ -119,6 +119,7 @@
int _datasize;
bool _active;
bool _stop;
+ bool _nots;
Task _task;
String _data;
NotifierSignal _nonfull_signal;
diff -ur ../../work/click-1.8.0/include/click/packet.hh ./include/click/packet.hh
--- ../../work/click-1.8.0/include/click/packet.hh 2010-02-28 18:22:28.000000000 +0100
+++ ./include/click/packet.hh 2011-06-30 00:37:00.000000000 +0200
@@ -669,6 +669,30 @@
# endif
#endif
+ static void *_p_freelist;
+ static int _p_freecount, _p_limit;
+ inline void *operator new(size_t len) {
+ if (!_p_freelist) {
+ //fprintf(stderr, "must allocate object\n");
+ return malloc(len);
+ } else {
+ Packet *p = (Packet *)_p_freelist;
+ _p_freelist = *(void **)p;
+ _p_freecount--;
+ return p;
+ }
+ }
+
+ inline void operator delete(void *p) {
+ if (_p_freecount >= _p_limit) {
+ free(p);
+ } else {
+ *(void **)p = _p_freelist;
+ _p_freelist = p;
+ _p_freecount++;
+ }
+ }
+
inline Packet();
Packet(const Packet &);
~Packet();
diff -ur ../../work/click-1.8.0/lib/packet.cc ./lib/packet.cc
--- ../../work/click-1.8.0/lib/packet.cc 2010-02-28 18:22:28.000000000 +0100
+++ ./lib/packet.cc 2011-06-30 16:45:05.000000000 +0200
@@ -178,6 +178,15 @@
* Avoid writing buggy code like this! Use WritablePacket selectively, and
* try to avoid calling WritablePacket::clone() when possible. */
+void *Packet::_p_freelist = 0;
+int Packet::_p_freecount = 0;
+int Packet::_p_limit = 10000;
+
+static void *_pb_freelist = 0;
+static int _pb_freecount = 0;
+static int _pb_limit = 10000;
+static int _pb_alloc = 0;
+
Packet::~Packet()
{
// This is a convenient place to put static assertions.
@@ -197,13 +206,21 @@
#if CLICK_LINUXMODULE
panic("Packet destructor");
#else
- if (_data_packet)
+ if (_data_packet) // XXX cloned
_data_packet->kill();
# if CLICK_USERLEVEL
else if (_head && _destructor)
_destructor(_head, _end - _head);
- else
- delete[] _head;
+ else {
+ //fprintf(stderr, "free buffer %p\n", _head);
+ if (_pb_freecount > _pb_limit) {
+ free(_head);
+ } else {
+ *(void **)_head = _pb_freelist;
+ _pb_freelist = _head;
+ _pb_freecount++;
+ }
+ }
# elif CLICK_BSDMODULE
if (_m)
m_freem(_m);
@@ -217,6 +234,7 @@
inline WritablePacket *
Packet::make(int, int, int)
{
+
return static_cast<WritablePacket *>(new Packet(6, 6, 6));
}
@@ -229,7 +247,19 @@
n = min_buffer_length;
}
#if CLICK_USERLEVEL
- unsigned char *d = new unsigned char[n];
+ unsigned char *d;
+ if (n > 2047) {
+ // fprintf(stderr, "alloc size %d count %d\n", n, _pb_alloc++);
+ d = (unsigned char *)malloc(n);
+ } else if (!_pb_freelist) {
+ // if (_pb_alloc % 10000 == 0)
+ // fprintf(stderr, "cur alloc size %d count %d\n", n, _pb_alloc++);
+ d = (unsigned char *)malloc(2048);
+ } else {
+ d = (unsigned char *)_pb_freelist;
+ _pb_freelist = *(char **)_pb_freelist;
+ _pb_freecount--;
+ }
if (!d)
return false;
_head = d;
@@ -494,8 +524,14 @@
# if CLICK_USERLEVEL
else if (_destructor)
_destructor(old_head, old_end - old_head);
- else
- delete[] old_head;
+ else {
+ if (_pb_freecount > _pb_limit) {
+ free(old_head);
+ } else {
+ *(unsigned char **)_pb_freecount = old_head;
+ _pb_freecount++;
+ }
+ }
_destructor = 0;
# elif CLICK_BSDMODULE
else
More information about the freebsd-net
mailing list