[run] [panic] [patch] Workaround for use-after-free panic
Juergen Lock
nox at jelal.kn-bremen.de
Wed Jan 12 20:16:05 UTC 2011
>Submitter-Id: current-users
>Originator: Juergen Lock
>Organization: me? organized??
>Confidential: no
>Synopsis: [run] [panic] [patch] Workaround for use-after-free panic
>Severity:
>Priority:
>Category: kern
>Class: sw-bug
>Release: FreeBSD 8.1-RC2 amd64
>Environment:
System: FreeBSD triton8.kn-bremen.de 8.1-RC2 FreeBSD 8.1-RC2 #9: Wed Sep 1 21:53:36 CEST 2010 nox at triton8.kn-bremen.de:/usr/obj/data2v/home/nox/src-r81/src/sys/TRITON8U amd64
Yes this is an older stable/8 checkout but if_run(4) is
checked out from head.
>Description:
Running the nic in hostap mode with wpa2 I once every few
weeks got the following crash:
#0 doadump () at pcpu.h:223
223 pcpu.h: No such file or directory.
in pcpu.h
(kgdb) bt
#0 doadump () at pcpu.h:223
#1 0xffffffff805f0719 in boot (howto=260)
at /data2v/home/nox/src-r81/src/sys/kern/kern_shutdown.c:416
#2 0xffffffff805f0b6c in panic (fmt=Variable "fmt" is not available.
)
at /data2v/home/nox/src-r81/src/sys/kern/kern_shutdown.c:590
#3 0xffffffff808e4e0d in trap_fatal (frame=0xc, eva=Variable "eva" is not available.
)
at /data2v/home/nox/src-r81/src/sys/amd64/amd64/trap.c:777
#4 0xffffffff808e51f4 in trap_pfault (frame=0xffffff80ec121aa0, usermode=0)
at /data2v/home/nox/src-r81/src/sys/amd64/amd64/trap.c:693
#5 0xffffffff808e5a7e in trap (frame=0xffffff80ec121aa0)
at /data2v/home/nox/src-r81/src/sys/amd64/amd64/trap.c:451
#6 0xffffffff808ca953 in calltrap ()
at /data2v/home/nox/src-r81/src/sys/amd64/amd64/exception.S:223
#7 0xffffffff81072ac6 in run_drain_fifo (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2245
#8 0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
#9 0xffffffff8062e543 in taskqueue_run (queue=0xffffff0005f42380)
at /data2v/home/nox/src-r81/src/sys/kern/subr_taskqueue.c:239
#10 0xffffffff8062e7c6 in taskqueue_thread_loop (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/kern/subr_taskqueue.c:360
---Type <return> to continue, or q <return> to quit---
#11 0xffffffff805c64a8 in fork_exit (
callout=0xffffffff8062e780 <taskqueue_thread_loop>,
arg=0xffffff8000b130b8, frame=0xffffff80ec121c80)
at /data2v/home/nox/src-r81/src/sys/kern/kern_fork.c:844
#12 0xffffffff808cae2e in fork_trampoline ()
at /data2v/home/nox/src-r81/src/sys/amd64/amd64/exception.S:562
#13 0x0000000000000000 in ?? ()
#14 0x0000000000000000 in ?? ()
#15 0x0000000000000000 in ?? ()
#16 0x0000000000000000 in ?? ()
#17 0x0000000000000000 in ?? ()
#18 0x0000000000000000 in ?? ()
#19 0x0000000000000000 in ?? ()
#20 0x0000000000000000 in ?? ()
#21 0x0000000000000000 in ?? ()
#22 0x0000000000000000 in ?? ()
#23 0x0000000000000000 in ?? ()
#24 0x0000000000000000 in ?? ()
#25 0x0000000000000000 in ?? ()
#26 0x0000000000000000 in ?? ()
#27 0x0000000000000000 in ?? ()
#28 0x0000000000000000 in ?? ()
#29 0x0000000000000000 in ?? ()
---Type <return> to continue, or q <return> to quit---
#30 0x0000000000000000 in ?? ()
#31 0x0000000000000000 in ?? ()
#32 0x0000000000000000 in ?? ()
#33 0x0000000000000000 in ?? ()
#34 0x0000000000000000 in ?? ()
#35 0x0000000000000000 in ?? ()
#36 0x0000000000000000 in ?? ()
#37 0x0000000000f37000 in ?? ()
#38 0x0000000000000000 in ?? ()
#39 0xffffff00078c47c0 in ?? ()
#40 0xffffffff80cac9c0 in affinity ()
#41 0xffffff00018837c0 in ?? ()
#42 0xffffff80ec121710 in ?? ()
#43 0xffffff80ec1216c8 in ?? ()
#44 0xffffff00078c47c0 in ?? ()
#45 0xffffffff8061471a in sched_switch (td=0xffffff8000b130b8,
newtd=0xffffffff8062e780, flags=Variable "flags" is not available.
)
at /data2v/home/nox/src-r81/src/sys/kern/sched_ule.c:1844
Previous frame inner to this frame (corrupt stack?)
(kgdb) fr 7
#7 0xffffffff81072ac6 in run_drain_fifo (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2245
2245 ni = sc->sc_ni[wcid];
(kgdb) p wcid
$1 = 1 '\001'
(kgdb) p sc->sc_ni
$2 = {0x0, 0xffffff8001676000, 0x0 <repeats 63 times>}
(kgdb) p sc->sc_ni[1]
$3 = (struct ieee80211_node *) 0xffffff8001676000
(kgdb) p *sc->sc_ni[1]
Cannot access memory at address 0xffffff8001676000
(kgdb) up
#8 0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
2210 run_drain_fifo(sc);
(kgdb) p sc->sc_ni
$4 = {0x0, 0xffffff8001676000, 0x0 <repeats 63 times>}
(kgdb) l run_drain_fifo
2216 usb_callout_reset(&sc->ratectl_ch, hz, run_ratectl_to, sc);
2217 }
2218
2219 static void
2220 run_drain_fifo(void *arg)
2221 {
2222 struct run_softc *sc = arg;
2223 struct ifnet *ifp = sc->sc_ifp;
2224 struct ieee80211_node *ni = sc->sc_ni[0]; /* make compiler happy */
2225 uint32_t stat;
(kgdb) l
2226 int retrycnt = 0;
2227 uint8_t wcid, mcs, pid;
2228
2229 RUN_LOCK_ASSERT(sc, MA_OWNED);
2230
2231 for (;;) {
2232 /* drain Tx status FIFO (maxsize = 16) */
2233 run_read(sc, RT2860_TX_STAT_FIFO, &stat);
2234 DPRINTFN(4, "tx stat 0x%08x\n", stat);
2235 if (!(stat & RT2860_TXQ_VLD))
(kgdb)
2236 break;
2237
2238 wcid = (stat >> RT2860_TXQ_WCID_SHIFT) & 0xff;
2239
2240 /* if no ACK was requested, no feedback is available */
2241 if (!(stat & RT2860_TXQ_ACKREQ) || wcid > RT2870_WCID_MAX ||
2242 wcid == 0)
2243 continue;
2244
2245 ni = sc->sc_ni[wcid];
(kgdb)
2246 if (ni->ni_rctls == NULL)
2247 continue;
2248
2249 /* update per-STA AMRR stats */
2250 if (stat & RT2860_TXQ_OK) {
2251 /*
2252 * Check if there were retries, ie if the Tx
2253 * success rate is different from the requested
2254 * rate. Note that it works only because we do
2255 * not allow rate fallback from OFDM to CCK.
(kgdb)
2256 */
2257 mcs = (stat >> RT2860_TXQ_MCS_SHIFT) & 0x7f;
2258 pid = (stat >> RT2860_TXQ_PID_SHIFT) & 0xf;
2259 if (mcs + 1 != pid)
2260 retrycnt = 1;
2261 ieee80211_ratectl_tx_complete(ni->ni_vap, ni,
2262 IEEE80211_RATECTL_TX_SUCCESS,
2263 &retrycnt, NULL);
2264 } else {
2265 retrycnt = 1;
(kgdb)
2266 ieee80211_ratectl_tx_complete(ni->ni_vap, ni,
2267 IEEE80211_RATECTL_TX_FAILURE,
2268 &retrycnt, NULL);
2269 ifp->if_oerrors++;
2270 }
2271 }
2272 DPRINTFN(3, "count=%d\n", sc->fifo_cnt);
2273
2274 sc->fifo_cnt = 0;
2275 }
(kgdb) up
#9 0xffffffff8062e543 in taskqueue_run (queue=0xffffff0005f42380)
at /data2v/home/nox/src-r81/src/sys/kern/subr_taskqueue.c:239
239 task->ta_func(task->ta_context, pending);
(kgdb) p task
$5 = (struct task *) 0xffffff8000a8be38
(kgdb) p *task
$6 = {ta_link = {stqe_next = 0x0}, ta_pending = 0, ta_priority = 0,
ta_func = 0xffffffff81072b60 <run_ratectl_cb>,
ta_context = 0xffffff8000a89000}
(kgdb) l run_ratectl_cb
2184 }
2185
2186 /* ARGSUSED */
2187 static void
2188 run_ratectl_cb(void *arg, int pending)
2189 {
2190 struct run_softc *sc = arg;
2191 struct ieee80211com *ic = sc->sc_ifp->if_l2com;
2192 struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
2193
(kgdb) down
#8 0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
2210 run_drain_fifo(sc);
(kgdb) l run_ratectl_cb
2184 }
2185
2186 /* ARGSUSED */
2187 static void
2188 run_ratectl_cb(void *arg, int pending)
2189 {
2190 struct run_softc *sc = arg;
2191 struct ieee80211com *ic = sc->sc_ifp->if_l2com;
2192 struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
2193
(kgdb) l
2194 if (vap == NULL)
2195 return;
2196
2197 if (sc->rvp_cnt <= 1 && vap->iv_opmode == IEEE80211_M_STA)
2198 run_iter_func(sc, vap->iv_bss);
2199 else {
2200 /*
2201 * run_reset_livelock() doesn't do anything with AMRR,
2202 * but Ralink wants us to call it every 1 sec. So, we
2203 * piggyback here rather than creating another callout.
(kgdb) p sc->rvp_cnt
$7 = 1 '\001'
(kgdb) l
2204 * Livelock may occur only in HOSTAP or IBSS mode
2205 * (when h/w is sending beacons).
2206 */
2207 RUN_LOCK(sc);
2208 run_reset_livelock(sc);
2209 /* just in case, there are some stats to drain */
2210 run_drain_fifo(sc);
2211 RUN_UNLOCK(sc);
2212 ieee80211_iterate_nodes(&ic->ic_sta, run_iter_func, sc);
2213 }
(kgdb) down
#7 0xffffffff81072ac6 in run_drain_fifo (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2245
2245 ni = sc->sc_ni[wcid];
(kgdb) up
#8 0xffffffff81072bc3 in run_ratectl_cb (arg=Variable "arg" is not available.
)
at /data2v/home/nox/src-r81/src/sys/modules/usb/run/../../../dev/usb/wlan/if_run.c:2210
2210 run_drain_fifo(sc);
(kgdb) l
2205 * (when h/w is sending beacons).
2206 */
2207 RUN_LOCK(sc);
2208 run_reset_livelock(sc);
2209 /* just in case, there are some stats to drain */
2210 run_drain_fifo(sc);
2211 RUN_UNLOCK(sc);
2212 ieee80211_iterate_nodes(&ic->ic_sta, run_iter_func, sc);
2213 }
2214
(kgdb) l
2215 if(sc->ratectl_run != RUN_RATECTL_OFF)
2216 usb_callout_reset(&sc->ratectl_ch, hz, run_ratectl_to, sc);
2217 }
2218
2219 static void
2220 run_drain_fifo(void *arg)
2221 {
2222 struct run_softc *sc = arg;
2223 struct ifnet *ifp = sc->sc_ifp;
2224 struct ieee80211_node *ni = sc->sc_ni[0]; /* make compiler happy */
(kgdb)
2225 uint32_t stat;
2226 int retrycnt = 0;
2227 uint8_t wcid, mcs, pid;
2228
2229 RUN_LOCK_ASSERT(sc, MA_OWNED);
2230
2231 for (;;) {
2232 /* drain Tx status FIFO (maxsize = 16) */
2233 run_read(sc, RT2860_TX_STAT_FIFO, &stat);
2234 DPRINTFN(4, "tx stat 0x%08x\n", stat);
(kgdb) p sc->fifo_cnt
$8 = 1 '\001'
(kgdb) l
2235 if (!(stat & RT2860_TXQ_VLD))
2236 break;
2237
2238 wcid = (stat >> RT2860_TXQ_WCID_SHIFT) & 0xff;
2239
2240 /* if no ACK was requested, no feedback is available */
2241 if (!(stat & RT2860_TXQ_ACKREQ) || wcid > RT2870_WCID_MAX ||
2242 wcid == 0)
2243 continue;
2244
(kgdb) l
2245 ni = sc->sc_ni[wcid];
2246 if (ni->ni_rctls == NULL)
2247 continue;
2248
2249 /* update per-STA AMRR stats */
2250 if (stat & RT2860_TXQ_OK) {
2251 /*
2252 * Check if there were retries, ie if the Tx
2253 * success rate is different from the requested
2254 * rate. Note that it works only because we do
(kgdb) p vap->iv_opmode
Variable "vap" is not available.
(kgdb) p ic->ic_vaps
$9 = {tqh_first = 0xffffff0007800000, tqh_last = 0xffffff0007800048}
(kgdb) p ic->ic_vaps->tqh_first
$10 = (struct ieee80211vap *) 0xffffff0007800000
(kgdb) p ic->ic_vaps->tqh_first->iv_opmode
$11 = IEEE80211_M_HOSTAP
(kgdb) p ic->ic_vaps->tqh_last->iv_opmode
Cannot access memory at address 0x2f0
(kgdb) p ic->ic_vaps->tqh_last
$12 = (struct ieee80211vap **) 0xffffff0007800048
(kgdb) p *ic->ic_vaps->tqh_last
$13 = (struct ieee80211vap *) 0x0
(kgdb) q
Script done on Tue Jan 4 09:23:48 2011
>How-To-Repeat:
Setup if_run(4) in hostap mode, wait a few weeks...
(I only have one smartphone using the wifi, maybe if
you have a bigger network it'll happen more often?)
>Fix:
I don't really know the wifi code so the following patch
is likely not the `proper' fix (and it also still has
diagnostic code that shouldn't be committed as is), but at
least it fixed the panic for me, I just finally got the
run0: drain_fifo ni=NULL wcid=1
message I added for the condition that previously caused
the panic, and the nic kept working. (The panic happened
when sc->sc_ni[wcid] was accessed by run_drain_fifo() after
it had been free'd, so I hooked into ic->ic_node_cleanup
to set it to NULL before it gets free'd and added a check
for NULL with the above message to run_drain_fifo().)
Index: src/sys/dev/usb/wlan/if_run.c
===================================================================
RCS file: /home/scvs/src/sys/dev/usb/wlan/if_run.c,v
retrieving revision 1.17
diff -u -p -r1.17 if_run.c
--- src/sys/dev/usb/wlan/if_run.c 6 Nov 2010 18:17:20 -0000 1.17
+++ src/sys/dev/usb/wlan/if_run.c 7 Jan 2011 00:58:35 -0000
@@ -341,6 +341,7 @@ static const char *run_get_rf(int);
static int run_read_eeprom(struct run_softc *);
static struct ieee80211_node *run_node_alloc(struct ieee80211vap *,
const uint8_t mac[IEEE80211_ADDR_LEN]);
+static void run_node_cleanup(struct ieee80211_node *ni);
static int run_media_change(struct ifnet *);
static int run_newstate(struct ieee80211vap *, enum ieee80211_state, int);
static int run_wme_update(struct ieee80211com *);
@@ -673,6 +674,8 @@ run_attach(device_t self)
ic->ic_scan_end = run_scan_end;
ic->ic_set_channel = run_set_channel;
ic->ic_node_alloc = run_node_alloc;
+ sc->sc_node_cleanup = ic->ic_node_cleanup;
+ ic->ic_node_cleanup = run_node_cleanup;
ic->ic_newassoc = run_newassoc;
//ic->ic_updateslot = run_updateslot;
ic->ic_update_mcast = run_update_mcast;
@@ -2243,7 +2246,14 @@ run_drain_fifo(void *arg)
continue;
ni = sc->sc_ni[wcid];
- if (ni->ni_rctls == NULL)
+#if 1
+ static struct ieee80211_node *lastni;
+ if (ni == NULL && lastni)
+ device_printf(sc->sc_dev, "drain_fifo ni=NULL wcid=%d\n",
+ wcid);
+ lastni = ni;
+#endif
+ if (ni == NULL || ni->ni_rctls == NULL)
continue;
/* update per-STA AMRR stats */
@@ -2373,10 +2383,12 @@ run_newassoc(struct ieee80211_node *ni,
ieee80211_runtask(ic, &sc->cmdq_task);
}
- DPRINTF("new assoc isnew=%d associd=%x addr=%s\n",
- isnew, ni->ni_associd, ether_sprintf(ni->ni_macaddr));
+ //DPRINTF("new assoc isnew=%d associd=%x addr=%s\n",
+ device_printf(sc->sc_dev, "new assoc isnew=%d associd=%x addr=%s ni=%p\n",
+ isnew, ni->ni_associd, ether_sprintf(ni->ni_macaddr), ni);
ieee80211_ratectl_node_init(ni);
+ rn->wcid = wcid;
sc->sc_ni[wcid] = ni;
for (i = 0; i < rs->rs_nrates; i++) {
@@ -2412,6 +2424,39 @@ run_newassoc(struct ieee80211_node *ni,
usb_callout_reset(&sc->ratectl_ch, hz, run_ratectl_to, sc);
}
+static void
+run_node_cleanup(struct ieee80211_node *ni)
+{
+ struct run_node *rn = (void *)ni;
+ struct ieee80211vap *vap = ni->ni_vap;
+ struct ieee80211com *ic = vap->iv_ic;
+ struct run_softc *sc = ic->ic_ifp->if_softc;
+ uint8_t wcid = RUN_AID2WCID(ni->ni_associd);
+
+ if (wcid == 0)
+ wcid = rn->wcid;
+ if (wcid > RT2870_WCID_MAX) {
+ device_printf(sc->sc_dev, "wcid=%d out of range\n", wcid);
+ sc->sc_node_cleanup(ni);
+ return;
+ }
+
+ //DPRINTF("node_cleanup wcid=%d addr=%s\n",
+ device_printf(sc->sc_dev, "node_cleanup wcid=%d addr=%s ni=%p\n",
+ wcid, ether_sprintf(vap->iv_opmode == IEEE80211_M_STA ?
+ vap->iv_myaddr : ni->ni_macaddr), ni);
+
+ if (wcid > 0 && sc->sc_ni[wcid]) {
+ if (sc->sc_ni[wcid] != ni) {
+ device_printf(sc->sc_dev, "node_cleanup sc->sc_ni[wcid] %p != ni\n",
+ sc->sc_ni[wcid]);
+ } else {
+ sc->sc_ni[wcid] = NULL;
+ }
+ }
+ sc->sc_node_cleanup(ni);
+}
+
/*
* Return the Rx chain with the highest RSSI for a given frame.
*/
Index: src/sys/dev/usb/wlan/if_runvar.h
===================================================================
RCS file: /home/scvs/src/sys/dev/usb/wlan/if_runvar.h,v
retrieving revision 1.6
diff -u -p -r1.6 if_runvar.h
--- src/sys/dev/usb/wlan/if_runvar.h 14 Jun 2010 00:40:23 -0000 1.6
+++ src/sys/dev/usb/wlan/if_runvar.h 4 Jan 2011 08:48:13 -0000
@@ -106,6 +106,7 @@ struct run_node {
uint8_t amrr_ridx;
uint8_t mgt_ridx;
uint8_t fix_ridx;
+ uint8_t wcid;
};
struct run_cmdq {
@@ -164,6 +165,8 @@ struct run_softc {
int (*sc_srom_read)(struct run_softc *,
uint16_t, uint16_t *);
+ void (*sc_node_cleanup)(struct ieee80211_node *);
+
uint16_t mac_ver;
uint16_t mac_rev;
uint8_t rf_rev;
More information about the freebsd-net
mailing list