svn commit: r347699 - stable/12/sys/kern
Konstantin Belousov
kib at FreeBSD.org
Thu May 16 14:39:49 UTC 2019
Author: kib
Date: Thu May 16 14:39:48 2019
New Revision: 347699
URL: https://svnweb.freebsd.org/changeset/base/347699
Log:
MFC r343985, r344133, r345273 (by bde):
Prevent overflow for usertime/systime in caclru1().
PR: 76972
Modified:
stable/12/sys/kern/kern_resource.c
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/kern/kern_resource.c
==============================================================================
--- stable/12/sys/kern/kern_resource.c Thu May 16 14:33:32 2019 (r347698)
+++ stable/12/sys/kern/kern_resource.c Thu May 16 14:39:48 2019 (r347699)
@@ -863,6 +863,90 @@ rufetchtd(struct thread *td, struct rusage *ru)
calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
}
+/* XXX: the MI version is too slow to use: */
+#ifndef __HAVE_INLINE_FLSLL
+#define flsll(x) (fls((x) >> 32) != 0 ? fls((x) >> 32) + 32 : fls(x))
+#endif
+
+static uint64_t
+mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c)
+{
+ uint64_t acc, bh, bl;
+ int i, s, sa, sb;
+
+ /*
+ * Calculate (a * b) / c accurately enough without overflowing. c
+ * must be nonzero, and its top bit must be 0. a or b must be
+ * <= c, and the implementation is tuned for b <= c.
+ *
+ * The comments about times are for use in calcru1() with units of
+ * microseconds for 'a' and stathz ticks at 128 Hz for b and c.
+ *
+ * Let n be the number of top zero bits in c. Each iteration
+ * either returns, or reduces b by right shifting it by at least n.
+ * The number of iterations is at most 1 + 64 / n, and the error is
+ * at most the number of iterations.
+ *
+ * It is very unusual to need even 2 iterations. Previous
+ * implementations overflowed essentially by returning early in the
+ * first iteration, with n = 38 giving overflow at 105+ hours and
+ * n = 32 giving overlow at at 388+ days despite a more careful
+ * calculation. 388 days is a reasonable uptime, and the calculation
+ * needs to work for the uptime times the number of CPUs since 'a'
+ * is per-process.
+ */
+ if (a >= (uint64_t)1 << 63)
+ return (0); /* Unsupported arg -- can't happen. */
+ acc = 0;
+ for (i = 0; i < 128; i++) {
+ sa = flsll(a);
+ sb = flsll(b);
+ if (sa + sb <= 64)
+ /* Up to 105 hours on first iteration. */
+ return (acc + (a * b) / c);
+ if (a >= c) {
+ /*
+ * This reduction is based on a = q * c + r, with the
+ * remainder r < c. 'a' may be large to start, and
+ * moving bits from b into 'a' at the end of the loop
+ * sets the top bit of 'a', so the reduction makes
+ * significant progress.
+ */
+ acc += (a / c) * b;
+ a %= c;
+ sa = flsll(a);
+ if (sa + sb <= 64)
+ /* Up to 388 days on first iteration. */
+ return (acc + (a * b) / c);
+ }
+
+ /*
+ * This step writes a * b as a * ((bh << s) + bl) =
+ * a * (bh << s) + a * bl = (a << s) * bh + a * bl. The 2
+ * additive terms are handled separately. Splitting in
+ * this way is linear except for rounding errors.
+ *
+ * s = 64 - sa is the maximum such that a << s fits in 64
+ * bits. Since a < c and c has at least 1 zero top bit,
+ * sa < 64 and s > 0. Thus this step makes progress by
+ * reducing b (it increases 'a', but taking remainders on
+ * the next iteration completes the reduction).
+ *
+ * Finally, the choice for s is just what is needed to keep
+ * a * bl from overflowing, so we don't need complications
+ * like a recursive call mul64_by_fraction(a, bl, c) to
+ * handle the second additive term.
+ */
+ s = 64 - sa;
+ bh = b >> s;
+ bl = b - (bh << s);
+ acc += (a * bl) / c;
+ a <<= s;
+ b = bh;
+ }
+ return (0); /* Algorithm failure -- can't happen. */
+}
+
static void
calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
struct timeval *sp)
@@ -887,15 +971,23 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struc
tu = ruxp->rux_tu;
}
+ /* Subdivide tu. Avoid overflow in the multiplications. */
+ if (__predict_true(tu <= ((uint64_t)1 << 38) && tt <= (1 << 26))) {
+ /* Up to 76 hours when stathz is 128. */
+ uu = (tu * ut) / tt;
+ su = (tu * st) / tt;
+ } else {
+ uu = mul64_by_fraction(tu, ut, tt);
+ su = mul64_by_fraction(tu, st, tt);
+ }
+
if (tu >= ruxp->rux_tu) {
/*
* The normal case, time increased.
* Enforce monotonicity of bucketed numbers.
*/
- uu = (tu * ut) / tt;
if (uu < ruxp->rux_uu)
uu = ruxp->rux_uu;
- su = (tu * st) / tt;
if (su < ruxp->rux_su)
su = ruxp->rux_su;
} else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
@@ -924,8 +1016,6 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struc
"to %ju usec for pid %d (%s)\n",
(uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
p->p_pid, p->p_comm);
- uu = (tu * ut) / tt;
- su = (tu * st) / tt;
}
ruxp->rux_uu = uu;
More information about the svn-src-all
mailing list