git: 71ec05a21257 - main - sort: Cache value of MB_CUR_MAX

Mark Johnston markj at FreeBSD.org
Thu May 13 13:35:19 UTC 2021


The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=71ec05a21257e159f40d54e26ad0011bb19b5134

commit 71ec05a21257e159f40d54e26ad0011bb19b5134
Author:     Cyril Zhang <cyril at freebsdfoundation.org>
AuthorDate: 2021-05-13 12:55:06 +0000
Commit:     Mark Johnston <markj at FreeBSD.org>
CommitDate: 2021-05-13 13:33:19 +0000

    sort: Cache value of MB_CUR_MAX
    
    Every usage of MB_CUR_MAX results in a call to __mb_cur_max.  This is
    inefficient and redundant.  Caching the value of MB_CUR_MAX in a global
    variable removes these calls and speeds up the runtime of sort.  For
    numeric sorting, runtime is almost halved in some tests.
    
    PR:             255551
    PR:             255840
    Reviewed by:    markj
    MFC after:      1 week
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D30170
---
 usr.bin/sort/bwstring.c  | 54 ++++++++++++++++++++++++------------------------
 usr.bin/sort/bwstring.h  |  9 ++++----
 usr.bin/sort/radixsort.c |  4 ++--
 usr.bin/sort/sort.c      |  6 +++++-
 usr.bin/sort/sort.h      |  6 ++++++
 5 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/usr.bin/sort/bwstring.c b/usr.bin/sort/bwstring.c
index cab21324c4b3..f6200c53c83e 100644
--- a/usr.bin/sort/bwstring.c
+++ b/usr.bin/sort/bwstring.c
@@ -59,7 +59,7 @@ initialise_months(void)
 	char *tmp;
 	size_t len;
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		if (cmonths == NULL) {
 			char *m;
 
@@ -143,7 +143,7 @@ void
 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
 {
 
-	if (MB_CUR_MAX == 1)
+	if (mb_cur_max == 1)
 		fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
 	else
 		fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
@@ -158,14 +158,14 @@ const void* bwsrawdata(const struct bwstring *bws)
 size_t bwsrawlen(const struct bwstring *bws)
 {
 
-	return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
+	return ((mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
 }
 
 size_t
 bws_memsize(const struct bwstring *bws)
 {
 
-	return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
+	return ((mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
 	    (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
 }
 
@@ -175,7 +175,7 @@ bws_setlen(struct bwstring *bws, size_t newlen)
 
 	if (bws && newlen != bws->len && newlen <= bws->len) {
 		bws->len = newlen;
-		if (MB_CUR_MAX == 1)
+		if (mb_cur_max == 1)
 			bws->data.cstr[newlen] = '\0';
 		else
 			bws->data.wstr[newlen] = L'\0';
@@ -190,14 +190,14 @@ bwsalloc(size_t sz)
 {
 	struct bwstring *ret;
 
-	if (MB_CUR_MAX == 1)
+	if (mb_cur_max == 1)
 		ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
 	else
 		ret = sort_malloc(sizeof(struct bwstring) +
 		    SIZEOF_WCHAR_STRING(sz + 1));
 	ret->len = sz;
 
-	if (MB_CUR_MAX == 1)
+	if (mb_cur_max == 1)
 		ret->data.cstr[ret->len] = '\0';
 	else
 		ret->data.wstr[ret->len] = L'\0';
@@ -218,7 +218,7 @@ bwsdup(const struct bwstring *s)
 	else {
 		struct bwstring *ret = bwsalloc(s->len);
 
-		if (MB_CUR_MAX == 1)
+		if (mb_cur_max == 1)
 			memcpy(ret->data.cstr, s->data.cstr, (s->len));
 		else
 			memcpy(ret->data.wstr, s->data.wstr,
@@ -242,7 +242,7 @@ bwssbdup(const wchar_t *str, size_t len)
 
 		ret = bwsalloc(len);
 
-		if (MB_CUR_MAX == 1)
+		if (mb_cur_max == 1)
 			for (size_t i = 0; i < len; ++i)
 				ret->data.cstr[i] = (unsigned char) str[i];
 		else
@@ -263,7 +263,7 @@ bwscsbdup(const unsigned char *str, size_t len)
 	ret = bwsalloc(len);
 
 	if (str) {
-		if (MB_CUR_MAX == 1)
+		if (mb_cur_max == 1)
 			memcpy(ret->data.cstr, str, len);
 		else {
 			mbstate_t mbs;
@@ -277,7 +277,7 @@ bwscsbdup(const unsigned char *str, size_t len)
 			memset(&mbs, 0, sizeof(mbs));
 
 			while (cptr < len) {
-				size_t n = MB_CUR_MAX;
+				size_t n = mb_cur_max;
 
 				if (n > len - cptr)
 					n = len - cptr;
@@ -334,7 +334,7 @@ bwscpy(struct bwstring *dst, const struct bwstring *src)
 		nums = dst->len;
 	dst->len = nums;
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		memcpy(dst->data.cstr, src->data.cstr, nums);
 		dst->data.cstr[dst->len] = '\0';
 	} else {
@@ -363,7 +363,7 @@ bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
 		nums = size;
 	dst->len = nums;
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		memcpy(dst->data.cstr, src->data.cstr, nums);
 		dst->data.cstr[dst->len] = '\0';
 	} else {
@@ -398,7 +398,7 @@ bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
 		if (nums > size)
 			nums = size;
 		dst->len = nums;
-		if (MB_CUR_MAX == 1) {
+		if (mb_cur_max == 1) {
 			memcpy(dst->data.cstr, src->data.cstr + offset,
 			    (nums));
 			dst->data.cstr[dst->len] = '\0';
@@ -420,7 +420,7 @@ size_t
 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
 {
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		size_t len = bws->len;
 
 		if (!zero_ended) {
@@ -478,7 +478,7 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
 
 	eols = zero_ended ? btowc('\0') : btowc('\n');
 
-	if (!zero_ended && (MB_CUR_MAX > 1)) {
+	if (!zero_ended && (mb_cur_max > 1)) {
 		wchar_t *ret;
 
 		ret = fgetwln(f, len);
@@ -494,7 +494,7 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
 		}
 		return (bwssbdup(ret, *len));
 
-	} else if (!zero_ended && (MB_CUR_MAX == 1)) {
+	} else if (!zero_ended && (mb_cur_max == 1)) {
 		char *ret;
 
 		ret = fgetln(f, len);
@@ -523,7 +523,7 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
 		}
 		rb->fgetwln_z_buffer[*len] = 0;
 
-		if (MB_CUR_MAX == 1)
+		if (mb_cur_max == 1)
 			while (!feof(f)) {
 				int c;
 
@@ -603,7 +603,7 @@ bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
 			if (len < cmp_len)
 				cmp_len = len;
 
-			if (MB_CUR_MAX == 1) {
+			if (mb_cur_max == 1) {
 				const unsigned char *s1, *s2;
 
 				s1 = bws1->data.cstr + offset;
@@ -696,7 +696,7 @@ bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
 			len1 -= offset;
 			len2 -= offset;
 
-			if (MB_CUR_MAX == 1) {
+			if (mb_cur_max == 1) {
 				const unsigned char *s1, *s2;
 
 				s1 = bws1->data.cstr + offset;
@@ -852,7 +852,7 @@ bwstod(struct bwstring *s0, bool *empty)
 {
 	double ret = 0;
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		unsigned char *end, *s;
 		char *ep;
 
@@ -909,7 +909,7 @@ int
 bws_month_score(const struct bwstring *s0)
 {
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		const unsigned char *end, *s;
 
 		s = s0->data.cstr;
@@ -949,7 +949,7 @@ struct bwstring *
 ignore_leading_blanks(struct bwstring *str)
 {
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		unsigned char *dst, *end, *src;
 
 		src = str->data.cstr;
@@ -1005,7 +1005,7 @@ ignore_nonprinting(struct bwstring *str)
 {
 	size_t newlen = str->len;
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		unsigned char *dst, *end, *src;
 		unsigned char c;
 
@@ -1058,7 +1058,7 @@ dictionary_order(struct bwstring *str)
 {
 	size_t newlen = str->len;
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		unsigned char *dst, *end, *src;
 		unsigned char c;
 
@@ -1109,7 +1109,7 @@ struct bwstring *
 ignore_case(struct bwstring *str)
 {
 
-	if (MB_CUR_MAX == 1) {
+	if (mb_cur_max == 1) {
 		unsigned char *end, *s;
 
 		s = str->data.cstr;
@@ -1137,7 +1137,7 @@ void
 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
 {
 
-	if (MB_CUR_MAX == 1)
+	if (mb_cur_max == 1)
 		warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
 	else
 		warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
diff --git a/usr.bin/sort/bwstring.h b/usr.bin/sort/bwstring.h
index 9d62eaf37f62..b63bb97ab93f 100644
--- a/usr.bin/sort/bwstring.h
+++ b/usr.bin/sort/bwstring.h
@@ -38,6 +38,7 @@
 #include <sysexits.h>
 #include <wchar.h>
 
+#include "sort.h"
 #include "mem.h"
 
 extern bool byte_sort;
@@ -109,7 +110,7 @@ static inline bwstring_iterator
 bws_end(struct bwstring *bws)
 {
 
-	return ((MB_CUR_MAX == 1) ?
+	return ((mb_cur_max == 1) ?
 	    (bwstring_iterator) (bws->data.cstr + bws->len) :
 	    (bwstring_iterator) (bws->data.wstr + bws->len));
 }
@@ -118,7 +119,7 @@ static inline bwstring_iterator
 bws_iterator_inc(bwstring_iterator iter, size_t pos)
 {
 
-	if (MB_CUR_MAX == 1)
+	if (mb_cur_max == 1)
 		return ((unsigned char *) iter) + pos;
 	else
 		return ((wchar_t*) iter) + pos;
@@ -128,7 +129,7 @@ static inline wchar_t
 bws_get_iter_value(bwstring_iterator iter)
 {
 
-	if (MB_CUR_MAX == 1)
+	if (mb_cur_max == 1)
 		return *((unsigned char *) iter);
 	else
 		return *((wchar_t*) iter);
@@ -137,7 +138,7 @@ bws_get_iter_value(bwstring_iterator iter)
 int
 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len);
 
-#define	BWS_GET(bws, pos) ((MB_CUR_MAX == 1) ? ((bws)->data.cstr[(pos)]) : (bws)->data.wstr[(pos)])
+#define	BWS_GET(bws, pos) ((mb_cur_max == 1) ? ((bws)->data.cstr[(pos)]) : (bws)->data.wstr[(pos)])
 
 void initialise_months(void);
 
diff --git a/usr.bin/sort/radixsort.c b/usr.bin/sort/radixsort.c
index 4993566aeb77..4c448fad69e9 100644
--- a/usr.bin/sort/radixsort.c
+++ b/usr.bin/sort/radixsort.c
@@ -258,7 +258,7 @@ add_leaf(struct sort_level *sl, struct sort_list_item *item)
 static inline int
 get_wc_index(struct sort_list_item *sli, size_t level)
 {
-	const size_t wcfact = (MB_CUR_MAX == 1) ? 1 : sizeof(wchar_t);
+	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
 	const struct key_value *kv;
 	const struct bwstring *bws;
 
@@ -331,7 +331,7 @@ free_sort_level(struct sort_level *sl)
 static void
 run_sort_level_next(struct sort_level *sl)
 {
-	const size_t wcfact = (MB_CUR_MAX == 1) ? 1 : sizeof(wchar_t);
+	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
 	struct sort_level *slc;
 	size_t i, sln, tosort_num;
 
diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c
index d5c18552d914..0fbbd9284aad 100644
--- a/usr.bin/sort/sort.c
+++ b/usr.bin/sort/sort.c
@@ -99,6 +99,8 @@ struct sort_opts sort_opts_vals;
 bool debug_sort;
 bool need_hint;
 
+size_t mb_cur_max;
+
 #if defined(SORT_THREADS)
 unsigned int ncpu = 1;
 size_t nthreads = 1;
@@ -305,7 +307,7 @@ conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
 	if (wc && c) {
 		int res;
 
-		res = mbtowc(wc, c, MB_CUR_MAX);
+		res = mbtowc(wc, c, mb_cur_max);
 		if (res < 1)
 			*wc = def;
 	}
@@ -322,6 +324,8 @@ set_locale(void)
 
 	setlocale(LC_ALL, "");
 
+	mb_cur_max = MB_CUR_MAX;
+
 	lc = localeconv();
 
 	if (lc) {
diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h
index b472fd16c098..96b36a65f4e4 100644
--- a/usr.bin/sort/sort.h
+++ b/usr.bin/sort/sort.h
@@ -127,4 +127,10 @@ extern struct sort_opts sort_opts_vals;
 
 extern struct sort_mods * const default_sort_mods;
 
+/* 
+ * Cached value of MB_CUR_MAX. Because MB_CUR_MAX is used often throughout the program,
+ * this avoids frequent calls to __mb_cur_max.
+ */
+extern size_t mb_cur_max;
+
 #endif /* __BSD_SORT_H__ */


More information about the dev-commits-src-main mailing list