git: 2c74c9dac3a6 - main - cxgbe: Compute timestamps via sbintime_t.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 26 Sep 2022 22:10:41 UTC
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=2c74c9dac3a6e74a2c33d519cdcf1de145e7664c commit 2c74c9dac3a6e74a2c33d519cdcf1de145e7664c Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2022-09-26 21:57:26 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2022-09-26 21:58:30 +0000 cxgbe: Compute timestamps via sbintime_t. This uses fixed-point math already used elsewhere in the kernel for sub-second time values. To avoid overflows this does require updating the calibration once a second rather than once every 30 seconds. Note that the cxgbe driver already queries multiple registers once a second for the statistics timers. This version also uses fewer instructions with no branches (for the math portion) in the per-packet fast path. Reviewed by: np Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D36663 --- sys/dev/cxgbe/adapter.h | 4 ++-- sys/dev/cxgbe/t4_main.c | 39 ++++++--------------------------------- sys/dev/cxgbe/t4_sge.c | 41 +++++------------------------------------ 3 files changed, 13 insertions(+), 71 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 4080f04246c2..9312549cf7ba 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -869,8 +869,8 @@ struct clip_entry; struct clock_sync { uint64_t hw_cur; uint64_t hw_prev; - uint64_t rt_cur; - uint64_t rt_prev; + sbintime_t sbt_cur; + sbintime_t sbt_prev; uint32_t gen; }; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 6547d199ca02..9f982ca32097 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -314,18 +314,6 @@ static int t4_rsrv_noflowq = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq, 0, "Reserve TX queue 0 of each VI for non-flowid packets"); -static int t4_clocksync_fast = 1; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, csfast, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_fast, 0, - "During initial clock sync how fast do we update in seconds"); - -static int t4_clocksync_normal = 30; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, csnormal, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_normal, 0, - "During normal clock sync how fast do we update in seconds"); - -static int t4_fast_2_normal = 30; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, cscount, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_fast_2_normal, 0, - "How many clock syncs do we need to do to transition to slow"); - #if defined(TCP_OFFLOAD) || defined(RATELIMIT) #define NOFLDTXQ 8 static int t4_nofldtxq = -NOFLDTXQ; @@ -1121,17 +1109,10 @@ t4_ifnet_unit(struct adapter *sc, struct port_info *pi) return (-1); } -static inline uint64_t -t4_get_ns_timestamp(struct timespec *ts) -{ - return ((ts->tv_sec * 1000000000) + ts->tv_nsec); -} - static void t4_calibration(void *arg) { struct adapter *sc; - struct timespec ts; struct clock_sync *cur, *nex; int next_up; @@ -1143,17 +1124,15 @@ t4_calibration(void *arg) if (__predict_false(sc->cal_count == 0)) { /* First time in, just get the values in */ cur->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO); - nanouptime(&ts); - cur->rt_cur = t4_get_ns_timestamp(&ts); + cur->sbt_cur = sbinuptime(); sc->cal_count++; goto done; } nex->hw_prev = cur->hw_cur; - nex->rt_prev = cur->rt_cur; - KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibtration")); + nex->sbt_prev = cur->sbt_cur; + KASSERT((hw_off_limits(sc) == 0), ("hw_off_limits at t4_calibration")); nex->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO); - nanouptime(&ts); - nex->rt_cur = t4_get_ns_timestamp(&ts); + nex->sbt_cur = sbinuptime(); if ((nex->hw_cur - nex->hw_prev) == 0) { /* The clock is not advancing? */ sc->cal_count = 0; @@ -1164,17 +1143,11 @@ t4_calibration(void *arg) sc->cal_current = next_up; sc->cal_gen++; atomic_store_rel_int(&nex->gen, sc->cal_gen); - if (sc->cal_count < t4_fast_2_normal) - sc->cal_count++; done: - callout_reset_sbt_curcpu(&sc->cal_callout, - ((sc->cal_count < t4_fast_2_normal) ? - t4_clocksync_fast : t4_clocksync_normal) * SBT_1S, 0, - t4_calibration, sc, C_DIRECT_EXEC); + callout_reset_sbt_curcpu(&sc->cal_callout, SBT_1S, 0, t4_calibration, + sc, C_DIRECT_EXEC); } - - static void t4_calibration_start(struct adapter *sc) { diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index e0b73ccd8b51..161a753cc4ee 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1526,10 +1526,9 @@ static inline uint64_t t4_tstmp_to_ns(struct adapter *sc, uint64_t lf) { struct clock_sync *cur, dcur; - uint64_t tstmp_sec, tstmp_nsec; uint64_t hw_clocks; - uint64_t rt_cur_to_prev, res_s, res_n, res_s_modulo, res; - uint64_t hw_clk_div, cclk; + uint64_t hw_clk_div; + sbintime_t sbt_cur_to_prev, sbt; uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */ uint32_t gen; @@ -1551,42 +1550,12 @@ t4_tstmp_to_ns(struct adapter *sc, uint64_t lf) * * With the constraints that we cannot use float and we * don't want to overflow the uint64_t numbers we are using. - * - * The plan is to take the clocking value of the hw timestamps - * and split them into seconds and nanosecond equivalent portions. - * Then we operate on the two portions seperately making sure to - * bring back the carry over from the seconds when we divide. - * - * First up lets get the two divided into separate entities - * i.e. the seconds. We use the clock frequency for this. - * Note that vpd.cclk is in khz, we need it in raw hz so - * convert to hz. */ - cclk = (uint64_t)sc->params.vpd.cclk * 1000; hw_clocks = hw_tstmp - dcur.hw_prev; - tstmp_sec = hw_clocks / cclk; - tstmp_nsec = hw_clocks % cclk; - /* Now work with them separately */ - rt_cur_to_prev = (dcur.rt_cur - dcur.rt_prev); - res_s = tstmp_sec * rt_cur_to_prev; - res_n = tstmp_nsec * rt_cur_to_prev; - /* Now lets get our divider */ + sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev); hw_clk_div = dcur.hw_cur - dcur.hw_prev; - /* Make sure to save the remainder from the seconds divide */ - res_s_modulo = res_s % hw_clk_div; - res_s /= hw_clk_div; - /* scale the remainder to where it should be */ - res_s_modulo *= cclk; - /* Now add in the remainder */ - res_n += res_s_modulo; - /* Now do the divide */ - res_n /= hw_clk_div; - res_s *= cclk; - /* Recombine the two */ - res = res_s + res_n; - /* And now add in the base time to get to the real timestamp */ - res += dcur.rt_prev; - return (res); + sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev; + return (sbttons(sbt)); } static inline void