TCP and syncache question
Andre Oppermann
andre at freebsd.org
Thu Nov 20 02:59:22 PST 2008
Rui Paulo wrote:
> On 17 Nov 2008, at 22:40, Andre Oppermann wrote:
>> This is a bit more complicated because of interactions with tcp_input()
>> where syncache_expand() is called from.
>>
>> The old code (as of December 2002) behaved slightly different. It would
>> not remove the syncache entry when (SND.UNA == SEG.ACK) but send a RST.
>> The (RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND) test wasn't done at
>> all. Instead a socket was opened whenever (SND.UNA == SEG.ACK)
>> succeeded.
>> This gave way to the "LAND" DoS attack which was mostly fixed with a test
>> for (RCV.IRS < SEG.SEQ).
>>
>> See the attached patch for fixed version of syncache_expand(). This
>> patch
>> is untested though. My development machine is currently down. Harti,
>> Rui
>> and Bjoern, please have a look at the patch and review it.
>>
>> --Andre
>>
>> --- tcp_input.c.1.390 Mon Nov 17 21:33:25 2008
>> +++ tcp_input.c.1.390.mod Mon Nov 17 21:35:22 2008
>> @@ -642,10 +642,13 @@ findpcb:
>> if (!syncache_expand(&inc, &to, th, &so, m)) {
>> /*
>> * No syncache entry or ACK was not
>> - * for our SYN/ACK. Send a RST.
>> + * for our SYN/ACK. Send a RST or
>> + * an ACK for re-synchronization.
>> * NB: syncache did its own logging
>> * of the failure cause.
>> */
>> + if (so == 1)
>> + goto dropunlock;
>> rstreason = BANDLIM_RST_OPENPORT;
>> goto dropwithreset;
>> }
>> --- tcp_syncache.c.1.160 Mon Nov 17 16:49:01 2008
>> +++ tcp_syncache.c.1.160.mod Mon Nov 17 23:35:39 2008
>> @@ -817,59 +817,47 @@ syncache_expand(struct in_conninfo *inc,
>> INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
>> KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
>> ("%s: can handle only ACK", __func__));
>> + *lsop = NULL;
>>
>> sc = syncache_lookup(inc, &sch); /* returns locked sch */
>> SCH_LOCK_ASSERT(sch);
>> if (sc == NULL) {
>> /*
>> * There is no syncache entry, so see if this ACK is
>> - * a returning syncookie. To do this, first:
>> - * A. See if this socket has had a syncache entry dropped in
>> - * the past. We don't want to accept a bogus syncookie
>> - * if we've never received a SYN.
>> - * B. check that the syncookie is valid. If it is, then
>> - * cobble up a fake syncache entry, and return.
>> + * a returning syncookie. If the syncookie is valid,
>> + * cobble up a fake syncache entry and create a socket.
>> + *
>> + * NB: Syncache head is locked for the syncookie access.
>> */
>> if (!tcp_syncookies) {
>> - SCH_UNLOCK(sch);
>> if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
>> log(LOG_DEBUG, "%s; %s: Spurious ACK, "
>> "segment rejected (syncookies disabled)\n",
>> s, __func__);
>> - goto failed;
>> + goto sendrst;
>> }
>> bzero(&scs, sizeof(scs));
>> sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
>> - SCH_UNLOCK(sch);
>> if (sc == NULL) {
>> if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
>> log(LOG_DEBUG, "%s; %s: Segment failed "
>> "SYNCOOKIE authentication, segment rejected "
>> "(probably spoofed)\n", s, __func__);
>> - goto failed;
>> + goto sendrst;
>> }
>> - } else {
>> - /* Pull out the entry to unlock the bucket row. */
>> - TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
>> - sch->sch_length--;
>> - V_tcp_syncache.cache_count--;
>> - SCH_UNLOCK(sch);
>> + goto expand; /* fully validated through syncookie */
>> }
>> + SCH_LOCK_ASSERT(sch);
>
> Why do you need this assert?
Removed. Was from an earlier iteration with different locking and gotos.
>> /*
>> * Segment validation:
>> - * ACK must match our initial sequence number + 1 (the SYN|ACK).
>> - */
>> - if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
>> - if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
>> - log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
>> - "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
>> - goto failed;
>> - }
>> -
>> - /*
>> + *
>> * The SEQ must fall in the window starting at the received
>> * initial receive sequence number + 1 (the SYN).
>> + * If not the segment may be from an earlier connection. We send
>> + * an ACK to re-synchronize the connection and keep the syncache
>> + * entry without ajusting its timers.
>> + * See RFC793 page 69, first check sequence number [SYN_RECEIVED].
>> */
>> if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
>> SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
>> @@ -877,14 +865,41 @@ syncache_expand(struct in_conninfo *inc,
>> if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
>> log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
>> "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
>> - goto failed;
>> + (void) syncache_respond(sc);
>> + *lsop = 1; /* prevent RST */
>> + goto sendrstkeep;
>> }
>>
>> + /*
>> + * ACK must match our initial sequence number + 1 (the SYN|ACK).
>> + * If not the segment may be from an earlier connection. We send
>> + * a RST but keep the syncache entry without ajusting its timers.
>> + * See RFC793 page 72, fifth check the ACK field, [SYN_RECEIVED].
>> + */
>> + if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
>> + if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
>> + log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
>> + "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
>> + goto sendrstkeep;
>> + }
>> +
>> + /*
>> + * Remove the entry to unlock the bucket row.
>> + * Tests from now on are fatal and remove the syncache entry.
>> + */
>> + TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
>> + sch->sch_length--;
>> + V_tcp_syncache.cache_count--;
>> +
>> + /*
>> + * If timestamps were not negotiated they must not show up later.
>> + * See RFC1312bis, section 1.3, second paragraph
>> + */
>> if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
>> if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
>> log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
>> "segment rejected\n", s, __func__);
>> - goto failed;
>> + goto sendrst;
>> }
>> /*
>> * If timestamps were negotiated the reflected timestamp
>> @@ -896,9 +911,11 @@ syncache_expand(struct in_conninfo *inc,
>> log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
>> "segment rejected\n",
>> s, __func__, to->to_tsecr, sc->sc_ts);
>> - goto failed;
>> + goto sendrst;
>> }
>>
>> +expand:
>> + SCH_UNLOCK(sch);
>> *lsop = syncache_socket(sc, *lsop, m);
>>
>> if (*lsop == NULL)
>> @@ -906,16 +923,18 @@ syncache_expand(struct in_conninfo *inc,
>> else
>> V_tcpstat.tcps_sc_completed++;
>>
>> -/* how do we find the inp for the new socket? */
>> if (sc != &scs)
>> syncache_free(sc);
>> return (1);
>> -failed:
>> - if (sc != NULL && sc != &scs)
>> +
>> +sendrst:
>> + if (sc != &scs)
>> syncache_free(sc);
>> +sendrstkeep:
>> + SCH_LOCK_ASSERT(sch);
>> + SCH_UNLOCK(sch);
>
> Why do we need an assert before an unlock?
This is customary in at least the TCP code. A failed assert provides more
and better diagnostics than a crash-and-burn failed unlock.
>> if (s != NULL)
>> free(s, M_TCPLOG);
>> - *lsop = NULL;
>> return (0);
>> }
>>
>
> This was probably out of scope:
yep. Put it in while reading the code.
>> @@ -1322,6 +1341,8 @@ syncache_respond(struct syncache *sc)
>> *
>> * 1) path_mtu_discovery is disabled
>> * 2) the SCF_UNREACH flag has been set
>> + *
>> + * XXXAO: The route lookup comment doesn't make sense.
>> */
>> if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
>> ip->ip_off |= IP_DF;
>
>
>
> I won't be able to test this any time soon, so I can't really comment on
> the rest, but it *looks* okay.
--
Andre
More information about the freebsd-net
mailing list