PERFORCE change 126711 for review
Kip Macy
kmacy at FreeBSD.org
Sat Sep 22 23:25:38 PDT 2007
http://perforce.freebsd.org/chv.cgi?CH=126711
Change 126711 by kmacy at kmacy_home:ethng on 2007/09/23 06:24:49
- reinstate lazy refcnt assignment, in the common case of ref_cnt == 1 this
saves us both a cache miss in uma_find_refcnt and a cache miss mb_free_ext
when decrementing the refcnt
- avoid touching the mbuf if MB_NOTAGS is passed - this allows us to avoid
unnecessary cache misses when freeing an mbuf that is no longer hot in the
cache
- add M_NOFREE flag to skip freeing an mbuf - this allows us to embed an mbuf
header in the cluster itself, giving all zones the packet zone like benefits
without the complexity and restrictions that it adds
- remove access to the packet zone from mbuf.h
Affected files ...
.. //depot/projects/ethng/src/sys/kern/kern_mbuf.c#6 edit
.. //depot/projects/ethng/src/sys/kern/uipc_mbuf.c#3 edit
.. //depot/projects/ethng/src/sys/sys/mbuf.h#6 edit
Differences ...
==== //depot/projects/ethng/src/sys/kern/kern_mbuf.c#6 (text+ko) ====
@@ -349,9 +349,10 @@
mb_dtor_mbuf(void *mem, int size, void *arg)
{
struct mbuf *m;
-
+ unsigned long flags = (unsigned long)arg;
+
m = (struct mbuf *)mem;
- if ((m->m_flags & M_PKTHDR) != 0)
+ if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
#ifdef INVARIANTS
@@ -378,7 +379,6 @@
KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__));
KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
- KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
#ifdef INVARIANTS
trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
#endif
@@ -404,7 +404,6 @@
mb_ctor_clust(void *mem, int size, void *arg, int how)
{
struct mbuf *m;
- u_int *refcnt;
int type;
uma_zone_t zone;
@@ -435,10 +434,7 @@
break;
}
-
if (arg != NULL) {
- refcnt = uma_find_refcnt(zone, mem);
- *refcnt = 1;
m = (struct mbuf *)arg;
m->m_ext.ext_buf = (caddr_t)mem;
m->m_data = m->m_ext.ext_buf;
@@ -447,7 +443,7 @@
m->m_ext.ext_args = NULL;
m->m_ext.ext_size = size;
m->m_ext.ext_type = type;
- m->m_ext.ref_cnt = refcnt;
+ m->m_ext.ref_cnt = NULL; /* lazy assignment */
}
return (0);
==== //depot/projects/ethng/src/sys/kern/uipc_mbuf.c#3 (text+ko) ====
@@ -220,18 +220,40 @@
void
mb_free_ext(struct mbuf *m)
{
+ int skipmbuf;
+ int dofree;
+ u_int cnt;
+
KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
- KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
+
+ /* Account for lazy ref count assign. */
+ if (m->m_ext.ref_cnt == NULL)
+ dofree = 1;
+ else
+ dofree = 0;
+
+ /*
+ * check if the header is embedded in the cluster
+ */
+ skipmbuf = (m->m_flags & M_NOFREE);
+
+ /*
+ * This is tricky. We need to make sure to decrement the
+ * refcount in a safe way but to also clean up if we're the
+ * last reference. This method seems to do it without race.
+ */
+ while (dofree == 0) {
+ cnt = *(m->m_ext.ref_cnt);
+ if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) {
+ if (cnt == 1)
+ dofree = 1;
+ break;
+ }
+ }
/* Free attached storage if this mbuf is the only reference to it. */
- if (*(m->m_ext.ref_cnt) == 1 ||
- atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
+ if (dofree) {
switch (m->m_ext.ext_type) {
- case EXT_PACKET: /* The packet zone is special. */
- if (*(m->m_ext.ref_cnt) == 0)
- *(m->m_ext.ref_cnt) = 1;
- uma_zfree(zone_pack, m);
- return; /* Job done. */
case EXT_CLUSTER:
uma_zfree(zone_clust, m->m_ext.ext_buf);
break;
@@ -263,6 +285,10 @@
("%s: unknown ext_type", __func__));
}
}
+
+ if (skipmbuf)
+ return;
+
/*
* Free this mbuf back to the mbuf zone with all m_ext
* information purged.
@@ -285,13 +311,9 @@
mb_dupcl(struct mbuf *n, struct mbuf *m)
{
KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
- KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
- if (*(m->m_ext.ref_cnt) == 1)
- *(m->m_ext.ref_cnt) += 1;
- else
- atomic_add_int(m->m_ext.ref_cnt, 1);
+ MEXT_ADD_REF(m);
n->m_ext.ext_buf = m->m_ext.ext_buf;
n->m_ext.ext_free = m->m_ext.ext_free;
n->m_ext.ext_args = m->m_ext.ext_args;
==== //depot/projects/ethng/src/sys/sys/mbuf.h#6 (text+ko) ====
@@ -44,6 +44,48 @@
#endif
#endif
+/*-
+ * mbuf external reference count management macros.
+ *
+ * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
+ * the external buffer ext_buf.
+ *
+ * MEXT_REM_REF(m): remove reference to m_ext object.
+ *
+ * MEXT_ADD_REF(m): add reference to m_ext object already
+ * referred to by (m). XXX Note that it is VERY important that you
+ * always set the second mbuf's m_ext.ref_cnt to point to the first
+ * one's (i.e., n->m_ext.ref_cnt = m->m_ext.ref_cnt) AFTER you run
+ * MEXT_ADD_REF(m). This is because m might have a lazy initialized
+ * ref_cnt (NULL) before this is run and it will only be looked up
+ * from here. We should make MEXT_ADD_REF() always take two mbufs
+ * as arguments so that it can take care of this itself.
+ */
+#define MEXT_IS_REF(m) (((m)->m_ext.ref_cnt != NULL) \
+ && (*((m)->m_ext.ref_cnt) > 1))
+
+#define MEXT_REM_REF(m) do { \
+ KASSERT((m)->m_ext.ref_cnt != NULL, ("m_ext refcnt lazy NULL")); \
+ KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \
+ atomic_subtract_int((m)->m_ext.ref_cnt, 1); \
+} while(0)
+
+#define MEXT_ADD_REF(m) do { \
+ if ((m)->m_ext.ref_cnt == NULL) { \
+ KASSERT((m)->m_ext.ext_type == EXT_CLUSTER || \
+ (m)->m_ext.ext_type == EXT_PACKET || \
+ (m)->m_ext.ext_type == EXT_JUMBOP || \
+ (m)->m_ext.ext_type == EXT_JUMBO9 || \
+ (m)->m_ext.ext_type == EXT_JUMBO16, \
+ ("Unexpected mbuf type has lazy refcnt %d", \
+ (m)->m_ext.ext_type)); \
+ (m)->m_ext.ref_cnt = (u_int *)uma_find_refcnt( \
+ zone_clust, (m)->m_ext.ext_buf); \
+ *((m)->m_ext.ref_cnt) = 2; \
+ } else \
+ atomic_add_int((m)->m_ext.ref_cnt, 1); \
+} while (0)
+
/*
* Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
* An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
@@ -130,13 +172,13 @@
* Description of external storage mapped into mbuf; valid only if M_EXT is
* set.
*/
-struct m_ext {
+struct m_ext_ {
caddr_t ext_buf; /* start of buffer */
void (*ext_free) /* free routine if not the usual */
(void *, void *);
void *ext_args; /* optional argument pointer */
+ volatile u_int *ref_cnt; /* pointer to ref count info */
u_int ext_size; /* size of buffer, for ext_free */
- volatile u_int *ref_cnt; /* pointer to ref count info */
int ext_type; /* type of external storage */
};
@@ -150,7 +192,7 @@
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
- struct m_ext MH_ext; /* M_EXT set */
+ struct m_ext_ MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
@@ -196,6 +238,7 @@
#define M_LASTFRAG 0x2000 /* packet is last fragment */
#define M_VLANTAG 0x10000 /* ether_vtag is valid */
#define M_PROMISC 0x20000 /* packet was not for us */
+#define M_NOFREE 0x40000 /* do not free the mbuf - its embedded in a cluster */
/*
* External buffer types: identify ext_buf type.
@@ -207,6 +250,8 @@
#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
#define EXT_PACKET 6 /* mbuf+cluster from packet zone */
#define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */
+#define EXT_IOVEC 8
+#define EXT_CLIOVEC 9
#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */
@@ -251,6 +296,8 @@
#define MT_DATA 1 /* dynamic (data) allocation */
#define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */
#define MT_SONAME 8 /* socket name */
+#define MT_IOVEC 9
+#define MT_CLIOVEC 10
#define MT_CONTROL 14 /* extra-data protocol message */
#define MT_OOBDATA 15 /* expedited data */
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
@@ -258,6 +305,9 @@
#define MT_NOINIT 255 /* Not a type but a flag to allocate
a non-initialized mbuf */
+#define MB_NOTAGS 0x1UL
+
+
/*
* General mbuf allocator statistics structure.
*
@@ -339,7 +389,6 @@
extern uma_zone_t zone_mbuf;
extern uma_zone_t zone_clust;
-extern uma_zone_t zone_pack;
extern uma_zone_t zone_jumbop;
extern uma_zone_t zone_jumbo9;
extern uma_zone_t zone_jumbo16;
@@ -458,11 +507,7 @@
static __inline struct mbuf *
m_getcl(int how, short type, int flags)
{
- struct mb_args args;
-
- args.flags = flags;
- args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how)));
+ return ((struct mbuf *)(m_getjcl(how, type, flags, MCLBYTES)));
}
/*
@@ -501,12 +546,18 @@
if (m->m_flags & M_EXT)
mb_free_ext(m);
- else
+ else if ((m->m_flags & M_NOFREE) == 0)
uma_zfree(zone_mbuf, m);
return (n);
}
static __inline void
+m_free_fast(struct mbuf *m)
+{
+ uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
+}
+
+static __inline void
m_clget(struct mbuf *m, int how)
{
@@ -514,14 +565,6 @@
printf("%s: %p mbuf already has cluster\n", __func__, m);
m->m_ext.ext_buf = (char *)NULL;
uma_zalloc_arg(zone_clust, m, how);
- /*
- * On a cluster allocation failure, drain the packet zone and retry,
- * we might be able to loosen a few clusters up on the drain.
- */
- if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
- zone_drain(zone_pack);
- uma_zalloc_arg(zone_clust, m, how);
- }
}
/*
@@ -618,7 +661,7 @@
*/
#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \
(!(((m)->m_flags & M_EXT)) || \
- (*((m)->m_ext.ref_cnt) == 1)) ) \
+ !MEXT_IS_REF(m))) \
/* Check if the supplied mbuf has a packet header, or else panic. */
#define M_ASSERTPKTHDR(m) \
More information about the p4-projects
mailing list