git: d9dc1603d6e4 - main - libc: Implement N2630.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Mon, 28 Aug 2023 15:38:21 UTC
The branch main has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=d9dc1603d6e48cca84cad3ebe859129131b8387c

commit d9dc1603d6e48cca84cad3ebe859129131b8387c
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2023-08-28 15:32:23 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2023-08-28 15:33:51 +0000

    libc: Implement N2630.
    
    This adds formatted input/output of binary integer numbers to the printf(), scanf(), and strtol() families, including their wide-character counterparts.
    
    Reviewed by:    imp, emaste
    Differential Revision:  https://reviews.freebsd.org/D41511
---
 lib/libc/iconv/_strtol.h      |   7 ++
 lib/libc/iconv/_strtoul.h     |   7 ++
 lib/libc/locale/wcstoimax.c   |   7 ++
 lib/libc/locale/wcstol.c      |   7 ++
 lib/libc/locale/wcstoll.c     |   7 ++
 lib/libc/locale/wcstoul.c     |   7 ++
 lib/libc/locale/wcstoull.c    |   7 ++
 lib/libc/locale/wcstoumax.c   |   7 ++
 lib/libc/stdio/printfcommon.h |  14 +++
 lib/libc/stdio/vfprintf.c     |  13 ++
 lib/libc/stdio/vfscanf.c      | 267 +++++++++++++++++++++++-------------------
 lib/libc/stdio/vfwprintf.c    |  13 ++
 lib/libc/stdio/vfwscanf.c     | 263 +++++++++++++++++++++++------------------
 lib/libc/stdlib/strtoimax.c   |   7 ++
 lib/libc/stdlib/strtol.c      |   7 ++
 lib/libc/stdlib/strtoll.c     |  12 +-
 lib/libc/stdlib/strtoul.c     |   7 ++
 lib/libc/stdlib/strtoull.c    |   7 ++
 lib/libc/stdlib/strtoumax.c   |   7 ++
 19 files changed, 436 insertions(+), 237 deletions(-)

diff --git a/lib/libc/iconv/_strtol.h b/lib/libc/iconv/_strtol.h
index d183edbe8c3a..94a13c56db98 100644
--- a/lib/libc/iconv/_strtol.h
+++ b/lib/libc/iconv/_strtol.h
@@ -91,6 +91,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == '0' && (*s == 'b' || *s == 'B') &&
+	    (s[1] >= '0' && s[1] <= '1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = (c == '0' ? 8 : 10);
 
diff --git a/lib/libc/iconv/_strtoul.h b/lib/libc/iconv/_strtoul.h
index eade72e9c2e6..4944e1fb06e0 100644
--- a/lib/libc/iconv/_strtoul.h
+++ b/lib/libc/iconv/_strtoul.h
@@ -87,6 +87,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == '0' && (*s == 'b' || *s == 'B') &&
+	    (s[1] >= '0' && s[1] <= '1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = (c == '0' ? 8 : 10);
 
diff --git a/lib/libc/locale/wcstoimax.c b/lib/libc/locale/wcstoimax.c
index 259faa2b011c..5ed949cd0531 100644
--- a/lib/libc/locale/wcstoimax.c
+++ b/lib/libc/locale/wcstoimax.c
@@ -86,6 +86,13 @@ wcstoimax_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstol.c b/lib/libc/locale/wcstol.c
index b0b787384f39..1678b615ca1c 100644
--- a/lib/libc/locale/wcstol.c
+++ b/lib/libc/locale/wcstol.c
@@ -80,6 +80,13 @@ wcstol_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr, int
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoll.c b/lib/libc/locale/wcstoll.c
index ac07d6c6adbf..ef1e6ef58861 100644
--- a/lib/libc/locale/wcstoll.c
+++ b/lib/libc/locale/wcstoll.c
@@ -86,6 +86,13 @@ wcstoll_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoul.c b/lib/libc/locale/wcstoul.c
index 9f58db799c0e..2c9c8820b1f6 100644
--- a/lib/libc/locale/wcstoul.c
+++ b/lib/libc/locale/wcstoul.c
@@ -80,6 +80,13 @@ wcstoul_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoull.c b/lib/libc/locale/wcstoull.c
index cbc7253f884d..692eb90eef6b 100644
--- a/lib/libc/locale/wcstoull.c
+++ b/lib/libc/locale/wcstoull.c
@@ -86,6 +86,13 @@ wcstoull_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoumax.c b/lib/libc/locale/wcstoumax.c
index 4380cccf2424..c4f2ec3aaf41 100644
--- a/lib/libc/locale/wcstoumax.c
+++ b/lib/libc/locale/wcstoumax.c
@@ -86,6 +86,13 @@ wcstoumax_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/stdio/printfcommon.h b/lib/libc/stdio/printfcommon.h
index ac5aed0a5fcd..411b778dc234 100644
--- a/lib/libc/stdio/printfcommon.h
+++ b/lib/libc/stdio/printfcommon.h
@@ -194,6 +194,13 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs)
 		} while (sval != 0);
 		break;
 
+	case 2:
+		do {
+			*--cp = to_char(val & 1);
+			val >>= 1;
+		} while (val);
+		break;
+
 	case 8:
 		do {
 			*--cp = to_char(val & 7);
@@ -244,6 +251,13 @@ __ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs)
 		} while (sval != 0);
 		break;
 
+	case 2:
+		do {
+			*--cp = to_char(val & 1);
+			val >>= 1;
+		} while (val);
+		break;
+
 	case 8:
 		do {
 			*--cp = to_char(val & 7);
diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index ad655c5d78d4..5e5a9b5e31c1 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -613,6 +613,19 @@ reswitch:	switch (ch) {
 		case 'z':
 			flags |= SIZET;
 			goto rflag;
+		case 'B':
+		case 'b':
+			if (flags & INTMAX_SIZE)
+				ujval = UJARG();
+			else
+				ulval = UARG();
+			base = 2;
+			/* leading 0b/B only if non-zero */
+			if (flags & ALT &&
+			    (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+				ox[1] = ch;
+			goto nosign;
+			break;
 		case 'C':
 			flags |= LONGINT;
 			/*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index cc2e1e428321..b4db62c216ed 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -6,6 +6,8 @@
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  *
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
@@ -80,16 +82,6 @@ static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
 #define	SHORTSHORT	0x4000	/* hh: char */
 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
 
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define	SIGNOK		0x40	/* +/- is (still) legal */
-#define	NDIGITS		0x80	/* no digits detected */
-#define	PFXOK		0x100	/* 0x prefix is (still) legal */
-#define	NZDIGITS	0x200	/* no zero digits detected */
-#define	HAVESIGN	0x10000	/* sign detected */
-
 /*
  * Conversion types.
  */
@@ -307,129 +299,160 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
 	return (n);
 }
 
+enum parseint_state {
+	begin,
+	havesign,
+	havezero,
+	haveprefix,
+	any,
+};
+
+static __inline int
+parseint_fsm(int c, enum parseint_state *state, int *base)
+{
+	switch (c) {
+	case '+':
+	case '-':
+		if (*state == begin) {
+			*state = havesign;
+			return 1;
+		}
+		break;
+	case '0':
+		if (*state == begin || *state == havesign) {
+			*state = havezero;
+		} else {
+			*state = any;
+		}
+		return 1;
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		if (*state == havezero && *base == 0) {
+			*base = 8;
+		}
+		/* FALL THROUGH */
+	case '8':
+	case '9':
+		if (*state == begin ||
+		    *state == havesign) {
+			if (*base == 0) {
+				*base = 10;
+			}
+		}
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - '0') {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'b':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'a':
+	case 'c':
+	case 'd':
+	case 'e':
+	case 'f':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'a' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'B':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'A':
+	case 'C':
+	case 'D':
+	case 'E':
+	case 'F':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'A' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'x':
+	case 'X':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 16) {
+				*state = haveprefix;
+				*base = 16;
+				return 1;
+			}
+		}
+		break;
+	}
+	return 0;
+}
+
 /*
- * Read an integer, storing it in buf.  The only relevant bit in the
- * flags argument is PFXOK.
+ * Read an integer, storing it in buf.
  *
  * Return 0 on a match failure, and the number of characters read
  * otherwise.
  */
 static __inline int
-parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
+parseint(FILE *fp, char * __restrict buf, int width, int base)
 {
-	/* `basefix' is used to avoid `if' tests */
-	static const short basefix[17] =
-		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+	enum parseint_state state = begin;
 	char *p;
 	int c;
 
-	flags |= SIGNOK | NDIGITS | NZDIGITS;
 	for (p = buf; width; width--) {
-		c = *fp->_p;
-		/*
-		 * Switch on the character; `goto ok' if we accept it
-		 * as a part of number.
-		 */
-		switch (c) {
-
-		/*
-		 * The digit 0 is always legal, but is special.  For
-		 * %i conversions, if no digits (zero or nonzero) have
-		 * been scanned (only signs), we will have base==0.
-		 * In that case, we should set it to 8 and enable 0x
-		 * prefixing.  Also, if we have not scanned zero
-		 * digits before this, do not turn off prefixing
-		 * (someone else will turn it off if we have scanned
-		 * any nonzero digits).
-		 */
-		case '0':
-			if (base == 0) {
-				base = 8;
-				flags |= PFXOK;
-			}
-			if (flags & NZDIGITS)
-				flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
-			else
-				flags &= ~(SIGNOK|PFXOK|NDIGITS);
-			goto ok;
-
-		/* 1 through 7 always legal */
-		case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-			base = basefix[base];
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* digits 8 and 9 ok iff decimal or hex */
-		case '8': case '9':
-			base = basefix[base];
-			if (base <= 8)
-				break;	/* not legal here */
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* letters ok iff hex */
-		case 'A': case 'B': case 'C':
-		case 'D': case 'E': case 'F':
-		case 'a': case 'b': case 'c':
-		case 'd': case 'e': case 'f':
-			/* no need to fix base here */
-			if (base <= 10)
-				break;	/* not legal here */
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* sign ok only as first character */
-		case '+': case '-':
-			if (flags & SIGNOK) {
-				flags &= ~SIGNOK;
-				flags |= HAVESIGN;
-				goto ok;
-			}
+		c = __sgetc(fp);
+		if (c == EOF)
 			break;
-
-		/*
-		 * x ok iff flag still set & 2nd char (or 3rd char if
-		 * we have a sign).
-		 */
-		case 'x': case 'X':
-			if (flags & PFXOK && p ==
-			    buf + 1 + !!(flags & HAVESIGN)) {
-				base = 16;	/* if %i */
-				flags &= ~PFXOK;
-				goto ok;
-			}
+		if (!parseint_fsm(c, &state, &base))
 			break;
-		}
-
-		/*
-		 * If we got here, c is not a legal character for a
-		 * number.  Stop accumulating digits.
-		 */
-		break;
-	ok:
-		/*
-		 * c is legal: store it and look at the next.
-		 */
 		*p++ = c;
-		if (--fp->_r > 0)
-			fp->_p++;
-		else if (__srefill(fp))
-			break;		/* EOF */
 	}
 	/*
-	 * If we had only a sign, it is no good; push back the sign.
-	 * If the number ends in `x', it was [sign] '0' 'x', so push
-	 * back the x and treat it as [sign] '0'.
+	 * If we only had a sign, push it back.  If we only had a 0b or 0x
+	 * prefix (possibly preceded by a sign), we view it as "0" and
+	 * push back the letter.  In all other cases, if we stopped
+	 * because we read a non-number character, push it back.
 	 */
-	if (flags & NDIGITS) {
-		if (p > buf)
-			(void) __ungetc(*(u_char *)--p, fp);
-		return (0);
-	}
-	c = ((u_char *)p)[-1];
-	if (c == 'x' || c == 'X') {
-		--p;
+	if (state == havesign) {
+		p--;
+		(void) __ungetc(*(u_char *)p, fp);
+	} else if (state == haveprefix) {
+		p--;
+		(void) __ungetc(c, fp);
+	} else if (c != EOF) {
 		(void) __ungetc(c, fp);
 	}
 	return (p - buf);
@@ -554,6 +577,13 @@ literal:
 		/*
 		 * Conversions.
 		 */
+		case 'B':
+		case 'b':
+			c = CT_INT;
+			flags |= UNSIGNED;
+			base = 2;
+			break;
+
 		case 'd':
 			c = CT_INT;
 			base = 10;
@@ -578,7 +608,6 @@ literal:
 
 		case 'X':
 		case 'x':
-			flags |= PFXOK;	/* enable 0x prefixing */
 			c = CT_INT;
 			flags |= UNSIGNED;
 			base = 16;
@@ -613,7 +642,7 @@ literal:
 			break;
 
 		case 'p':	/* pointer format is like hex */
-			flags |= POINTER | PFXOK;
+			flags |= POINTER;
 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
 			base = 16;
@@ -738,7 +767,7 @@ literal:
 				width = sizeof(buf) - 2;
 			width++;
 #endif
-			nr = parseint(fp, buf, width, base, flags);
+			nr = parseint(fp, buf, width, base);
 			if (nr == 0)
 				goto match_failure;
 			if ((flags & SUPPRESS) == 0) {
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index fc681e8d0575..259a86467ea7 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c
@@ -684,6 +684,19 @@ reswitch:	switch (ch) {
 		case 'z':
 			flags |= SIZET;
 			goto rflag;
+		case 'B':
+		case 'b':
+			if (flags & INTMAX_SIZE)
+				ujval = UJARG();
+			else
+				ulval = UARG();
+			base = 2;
+			/* leading 0b/B only if non-zero */
+			if (flags & ALT &&
+			    (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+				ox[1] = ch;
+			goto nosign;
+			break;
 		case 'C':
 			flags |= LONGINT;
 			/*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c
index 1a28ff665247..3300751eafb6 100644
--- a/lib/libc/stdio/vfwscanf.c
+++ b/lib/libc/stdio/vfwscanf.c
@@ -9,6 +9,8 @@
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  *
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
@@ -78,16 +80,6 @@ static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
 #define	SHORTSHORT	0x4000	/* hh: char */
 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
 
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define	SIGNOK		0x40	/* +/- is (still) legal */
-#define	NDIGITS		0x80	/* no digits detected */
-#define	PFXOK		0x100	/* 0x prefix is (still) legal */
-#define	NZDIGITS	0x200	/* no zero digits detected */
-#define	HAVESIGN	0x10000	/* sign detected */
-
 /*
  * Conversion types.
  */
@@ -289,128 +281,161 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
 	return (nread);
 }
 
+enum parseint_state {
+	begin,
+	havesign,
+	havezero,
+	haveprefix,
+	any,
+};
+
+static __inline int
+parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
+{
+	switch (c) {
+	case '+':
+	case '-':
+		if (*state == begin) {
+			*state = havesign;
+			return 1;
+		}
+		break;
+	case '0':
+		if (*state == begin || *state == havesign) {
+			*state = havezero;
+		} else {
+			*state = any;
+		}
+		return 1;
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		if (*state == havezero && *base == 0) {
+			*base = 8;
+		}
+		/* FALL THROUGH */
+	case '8':
+	case '9':
+		if (*state == begin ||
+		    *state == havesign) {
+			if (*base == 0) {
+				*base = 10;
+			}
+		}
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - '0') {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'b':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'a':
+	case 'c':
+	case 'd':
+	case 'e':
+	case 'f':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'a' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'B':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'A':
+	case 'C':
+	case 'D':
+	case 'E':
+	case 'F':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'A' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'x':
+	case 'X':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 16) {
+				*state = haveprefix;
+				*base = 16;
+				return 1;
+			}
+		}
+		break;
+	}
+	return 0;
+}
+
 /*
- * Read an integer, storing it in buf.  The only relevant bit in the
- * flags argument is PFXOK.
+ * Read an integer, storing it in buf.
  *
  * Return 0 on a match failure, and the number of characters read
  * otherwise.
  */
 static __inline int
-parseint(FILE *fp, wchar_t *buf, int width, int base, int flags,
+parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
     locale_t locale)
 {
-	/* `basefix' is used to avoid `if' tests */
-	static const short basefix[17] =
-		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+	enum parseint_state state = begin;
 	wchar_t *wcp;
 	int c;
 
-	flags |= SIGNOK | NDIGITS | NZDIGITS;
 	for (wcp = buf; width; width--) {
 		c = __fgetwc(fp, locale);
-		/*
-		 * Switch on the character; `goto ok' if we accept it
-		 * as a part of number.
-		 */
-		switch (c) {
-
-		/*
-		 * The digit 0 is always legal, but is special.  For
-		 * %i conversions, if no digits (zero or nonzero) have
-		 * been scanned (only signs), we will have base==0.
-		 * In that case, we should set it to 8 and enable 0x
-		 * prefixing.  Also, if we have not scanned zero
-		 * digits before this, do not turn off prefixing
-		 * (someone else will turn it off if we have scanned
-		 * any nonzero digits).
-		 */
-		case '0':
-			if (base == 0) {
-				base = 8;
-				flags |= PFXOK;
-			}
-			if (flags & NZDIGITS)
-				flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
-			else
-				flags &= ~(SIGNOK|PFXOK|NDIGITS);
-			goto ok;
-
-		/* 1 through 7 always legal */
-		case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-			base = basefix[base];
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* digits 8 and 9 ok iff decimal or hex */
-		case '8': case '9':
-			base = basefix[base];
-			if (base <= 8)
-				break;	/* not legal here */
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* letters ok iff hex */
-		case 'A': case 'B': case 'C':
-		case 'D': case 'E': case 'F':
-		case 'a': case 'b': case 'c':
-		case 'd': case 'e': case 'f':
-			/* no need to fix base here */
-			if (base <= 10)
-				break;	/* not legal here */
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* sign ok only as first character */
-		case '+': case '-':
-			if (flags & SIGNOK) {
-				flags &= ~SIGNOK;
-				flags |= HAVESIGN;
-				goto ok;
-			}
+		if (c == WEOF)
 			break;
-
-		/*
-		 * x ok iff flag still set & 2nd char (or 3rd char if
-		 * we have a sign).
-		 */
-		case 'x': case 'X':
-			if (flags & PFXOK && wcp ==
-			    buf + 1 + !!(flags & HAVESIGN)) {
-				base = 16;	/* if %i */
-				flags &= ~PFXOK;
-				goto ok;
-			}
+		if (!parseint_fsm(c, &state, &base))
 			break;
-		}
-
-		/*
-		 * If we got here, c is not a legal character for a
-		 * number.  Stop accumulating digits.
-		 */
-		if (c != WEOF)
-			__ungetwc(c, fp, locale);
-		break;
-	ok:
-		/*
-		 * c is legal: store it and look at the next.
-		 */
 		*wcp++ = (wchar_t)c;
 	}
 	/*
-	 * If we had only a sign, it is no good; push back the sign.
-	 * If the number ends in `x', it was [sign] '0' 'x', so push
-	 * back the x and treat it as [sign] '0'.
+	 * If we only had a sign, push it back.  If we only had a 0b or 0x
+	 * prefix (possibly preceded by a sign), we view it as "0" and
+	 * push back the letter.  In all other cases, if we stopped
+	 * because we read a non-number character, push it back.
 	 */
-	if (flags & NDIGITS) {
-		if (wcp > buf)
-			__ungetwc(*--wcp, fp, locale);
-		return (0);
-	}
-	c = wcp[-1];
-	if (c == 'x' || c == 'X') {
-		--wcp;
+	if (state == havesign) {
+		wcp--;
+		__ungetwc(*wcp, fp, locale);
+	} else if (state == haveprefix) {
+		wcp--;
+		__ungetwc(c, fp, locale);
+	} else if (c != WEOF) {
 		__ungetwc(c, fp, locale);
 	}
 	return (wcp - buf);
@@ -536,6 +561,13 @@ literal:
 		/*
 		 * Conversions.
 		 */
+		case 'B':
+		case 'b':
+			c = CT_INT;
+			flags |= UNSIGNED;
+			base = 2;
+			break;
+
 		case 'd':
 			c = CT_INT;
 			base = 10;
@@ -560,7 +592,6 @@ literal:
 
 		case 'X':
 		case 'x':
-			flags |= PFXOK;	/* enable 0x prefixing */
 			c = CT_INT;
 			flags |= UNSIGNED;
 			base = 16;
@@ -606,7 +637,7 @@ literal:
 			break;
 
 		case 'p':	/* pointer format is like hex */
-			flags |= POINTER | PFXOK;
+			flags |= POINTER;
 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
 			base = 16;
@@ -716,7 +747,7 @@ literal:
 			    sizeof(*buf) - 1)
 				width = sizeof(buf) / sizeof(*buf) - 1;
 
-			nr = parseint(fp, buf, width, base, flags, locale);
+			nr = parseint(fp, buf, width, base, locale);
 			if (nr == 0)
 				goto match_failure;
 			if ((flags & SUPPRESS) == 0) {
diff --git a/lib/libc/stdlib/strtoimax.c b/lib/libc/stdlib/strtoimax.c
index 894d801940fd..5309b7d4305c 100644
--- a/lib/libc/stdlib/strtoimax.c
+++ b/lib/libc/stdlib/strtoimax.c
@@ -87,6 +87,13 @@ strtoimax_l(const char * __restrict nptr, char ** __restrict endptr, int base,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == '0' && (*s == 'b' || *s == 'B') &&
+	    (s[1] >= '0' && s[1] <= '1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == '0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/stdlib/strtol.c b/lib/libc/stdlib/strtol.c
index 360bb7efc8be..1ca95918ef12 100644
--- a/lib/libc/stdlib/strtol.c
+++ b/lib/libc/stdlib/strtol.c
@@ -87,6 +87,13 @@ strtol_l(const char * __restrict nptr, char ** __restrict endptr, int base,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == '0' && (*s == 'b' || *s == 'B') &&
+	    (s[1] >= '0' && s[1] <= '1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == '0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/stdlib/strtoll.c b/lib/libc/stdlib/strtoll.c
index 51a523e51fb8..6845776c5f03 100644
--- a/lib/libc/stdlib/strtoll.c
+++ b/lib/libc/stdlib/strtoll.c
@@ -63,8 +63,9 @@ strtoll_l(const char * __restrict nptr, char ** __restrict endptr, int base,
 
 	/*
 	 * Skip white space and pick up leading +/- sign if any.
-	 * If base is 0, allow 0x for hex and 0 for octal, else
-	 * assume decimal; if base is already 16, allow 0x.
+	 * If base is 0, allow 0b for binary, 0x for hex, and 0 for
+	 * octal, else assume decimal; if base is already 2, allow
+	 * 0b; if base is already 16, allow 0x.
 	 */
 	s = nptr;
 	do {
@@ -87,6 +88,13 @@ strtoll_l(const char * __restrict nptr, char ** __restrict endptr, int base,
*** 67 LINES SKIPPED ***