svn commit: r286535 - projects/collation/lib/libc/string

Baptiste Daroussin bapt at FreeBSD.org
Sun Aug 9 17:08:19 UTC 2015


Author: bapt
Date: Sun Aug  9 17:08:18 2015
New Revision: 286535
URL: https://svnweb.freebsd.org/changeset/base/286535

Log:
  Update wcscoll forgotten in previous patch

Modified:
  projects/collation/lib/libc/string/wcscoll.c

Modified: projects/collation/lib/libc/string/wcscoll.c
==============================================================================
--- projects/collation/lib/libc/string/wcscoll.c	Sun Aug  9 15:53:02 2015	(r286534)
+++ projects/collation/lib/libc/string/wcscoll.c	Sun Aug  9 17:08:18 2015	(r286535)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002 Tim J. Robbins
  * All rights reserved.
  *
@@ -38,50 +39,162 @@ __FBSDID("$FreeBSD$");
 #include <wchar.h>
 #include "collate.h"
 
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder implementation of wcscoll(). Attempts to use the single-byte
- * collation ordering where possible, and falls back on wcscmp() in locales
- * with extended character sets.
- */
 int
 wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
 {
-	char *mbs1, *mbs2;
-	int diff, sverrno;
+	int len1, len2, pri1, pri2, ret;
+	wchar_t *tr1 = NULL, *tr2 = NULL;
+	int direc, pass;
+
 	FIX_LOCALE(locale);
 	struct xlocale_collate *table =
 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
 
-	if (table->__collate_load_error || MB_CUR_MAX > 1)
+	if (table->__collate_load_error)
 		/*
-		 * Locale has no special collating order, could not be
-		 * loaded, or has an extended character set; do a fast binary
-		 * comparison.
+		 * Locale has no special collating order or could not be
+		 * loaded, do a fast binary comparison.
 		 */
 		return (wcscmp(ws1, ws2));
 
-	if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) {
-		/*
-		 * Out of memory or illegal wide chars; fall back to wcscmp()
-		 * but leave errno indicating the error. Callers that don't
-		 * check for error will get a reasonable but often slightly
-		 * incorrect result.
-		 */
-		sverrno = errno;
-		free(mbs1);
-		errno = sverrno;
-		return (wcscmp(ws1, ws2));
-	}
+	ret = 0;
 
-	diff = strcoll_l(mbs1, mbs2, locale);
-	sverrno = errno;
-	free(mbs1);
-	free(mbs2);
-	errno = sverrno;
+	/*
+	 * Once upon a time we had code to try to optimize this, but
+	 * it turns out that you really can't make many assumptions
+	 * safely.  You absolutely have to run this pass by pass,
+	 * because some passes will be ignored for a given character,
+	 * while others will not.  Simpler locales will benefit from
+	 * having fewer passes, and most comparisions should resolve
+	 * during the primary pass anyway.
+	 *
+	 * Note that we do one final extra pass at the end to pick
+	 * up UNDEFINED elements.  There is special handling for them.
+	 */
+	for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+		const int32_t *st1 = NULL;
+		const int32_t *st2 = NULL;
+		const wchar_t	*w1 = ws1;
+		const wchar_t	*w2 = ws2;
+
+		/* special pass for UNDEFINED */
+		if (pass == table->info->directive_count) {
+			direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+		} else {
+			direc = table->info->directive[pass];
+		}
+
+		if (direc & DIRECTIVE_BACKWARD) {
+			wchar_t *bp, *fp, c;
+			if ((tr1 = wcsdup(w1)) == NULL)
+				goto fail;
+			bp = tr1;
+			fp = tr1 + wcslen(tr1) - 1;
+			while (bp < fp) {
+				c = *bp;
+				*bp++ = *fp;
+				*fp-- = c;
+			}
+			if ((tr2 = wcsdup(w2)) == NULL)
+				goto fail;
+			bp = tr2;
+			fp = tr2 + wcslen(tr2) - 1;
+			while (bp < fp) {
+				c = *bp;
+				*bp++ = *fp;
+				*fp-- = c;
+			}
+			w1 = tr1;
+			w2 = tr2;
+		}
+
+		if (direc & DIRECTIVE_POSITION) {
+			while ((*w1 || st1) && (*w2 || st2)) {
+				pri1 = pri2 = 0;
+				_collate_lookup(table, w1, &len1, &pri1, pass,
+				    &st1);
+				if (pri1 <= 0) {
+					if (pri1 < 0) {
+						errno = EINVAL;
+						goto fail;
+					}
+					pri1 = COLLATE_MAX_PRIORITY;
+				}
+				_collate_lookup(table, w2, &len2, &pri2, pass,
+				    &st2);
+				if (pri2 <= 0) {
+					if (pri2 < 0) {
+						errno = EINVAL;
+						goto fail;
+					}
+					pri2 = COLLATE_MAX_PRIORITY;
+				}
+				if (pri1 != pri2) {
+					ret = pri1 - pri2;
+					goto end;
+				}
+				w1 += len1;
+				w2 += len2;
+			}
+		} else {
+			while ((*w1 || st1) && (*w2 || st2)) {
+				pri1 = pri2 = 0;
+				while (*w1) {
+					_collate_lookup(table, w1, &len1,
+					    &pri1, pass, &st1);
+					if (pri1 > 0)
+						break;
+					if (pri1 < 0) {
+						errno = EINVAL;
+						goto fail;
+					}
+					w1 += len1;
+				}
+				while (*w2) {
+					_collate_lookup(table, w2, &len2,
+					    &pri2, pass, &st2);
+					if (pri2 > 0)
+						break;
+					if (pri2 < 0) {
+						errno = EINVAL;
+						goto fail;
+					}
+					w2 += len2;
+				}
+				if (!pri1 || !pri2)
+					break;
+				if (pri1 != pri2) {
+					ret = pri1 - pri2;
+					goto end;
+				}
+				w1 += len1;
+				w2 += len2;
+			}
+		}
+		if (!*w1) {
+			if (*w2) {
+				ret = -(int)*w2;
+				goto end;
+			}
+		} else {
+			ret = *w1;
+			goto end;
+		}
+	}
+	ret = 0;
 
-	return (diff);
+end:
+	if (tr1)
+		free(tr1);
+	if (tr2)
+		free(tr2);
+
+	return (ret);
+
+fail:
+	ret = wcscmp(ws1, ws2);
+	goto end;
 }
 
 int
@@ -89,24 +202,3 @@ wcscoll(const wchar_t *ws1, const wchar_
 {
 	return wcscoll_l(ws1, ws2, __get_locale());
 }
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
-	static const mbstate_t initial;
-	mbstate_t st;
-	const wchar_t *wcp;
-	size_t len;
-	char *mbs;
-
-	wcp = ws;
-	st = initial;
-	if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
-		return (NULL);
-	if ((mbs = malloc(len + 1)) == NULL)
-		return (NULL);
-	st = initial;
-	wcsrtombs(mbs, &ws, len + 1, &st);
-
-	return (mbs);
-}


More information about the svn-src-projects mailing list