PERFORCE change 146557 for review
Konrad Jankowski
konrad at FreeBSD.org
Sun Aug 3 20:57:45 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=146557
Change 146557 by konrad at vspredator on 2008/08/03 20:57:32
Temporarily reverted changes, so that correct deltas can be seen.
Affected files ...
.. //depot/projects/soc2008/konrad_collation/libc/locale/collate.c#6 edit
.. //depot/projects/soc2008/konrad_collation/libc/locale/collate.h#5 edit
.. //depot/projects/soc2008/konrad_collation/libc/locale/collcmp.c#5 edit
.. //depot/projects/soc2008/konrad_collation/libc/locale/setlocale.c#5 edit
.. //depot/projects/soc2008/konrad_collation/libc/string/strcoll.c#5 edit
.. //depot/projects/soc2008/konrad_collation/libc/string/strxfrm.c#5 edit
.. //depot/projects/soc2008/konrad_collation/libc/string/wcscoll.c#5 edit
.. //depot/projects/soc2008/konrad_collation/libc/string/wcsxfrm.c#5 edit
Differences ...
==== //depot/projects/soc2008/konrad_collation/libc/locale/collate.c#6 (text+ko) ====
@@ -26,26 +26,16 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.33 2004/09/22 16:56:48 stefanf Exp $");
-
-#define __collate_chain_equiv_table (__collate_data->__chain_equiv_table)
-#define __collate_chain_pri_table (__collate_data->__chain_pri_table)
-#define __collate_char_pri_table (__collate_data->__char_pri_table)
-#define __collate_info (&__collate_data->__info)
-#define __collate_large_char_pri_table (__collate_data->__large_char_pri_table)
-#define __collate_substitute_table (__collate_data->__substitute_table)
+__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
#include "namespace.h"
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
-#include <stddef.h>
#include <string.h>
-#include <wchar.h>
#include <errno.h>
#include <unistd.h>
#include <sysexits.h>
-#include <ctype.h>
#include "un-namespace.h"
#include "collate.h"
@@ -54,25 +44,24 @@
#include "libc_private.h"
-#if _BYTE_ORDER == _LITTLE_ENDIAN
-static void wntohl(wchar_t *, int);
-#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
-void __collate_err(int ex, const char *f) __dead2;
-
-#undef __collate_load_error
int __collate_load_error = 1;
int __collate_substitute_nontrivial;
-struct __locale_st_collate *__collate_data = NULL;
+
+u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
+struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
+struct __collate_st_chain_pri *__collate_chain_pri_table;
+
+void __collate_err(int ex, const char *f) __dead2;
int
__collate_load_tables(const char *encoding)
{
FILE *fp;
- int i, saverr, chains, z;
+ int i, saverr, chains;
+ uint32_t u32;
char strbuf[STR_LEN], buf[PATH_MAX];
- struct __locale_st_collate *TMP;
- struct __collate_st_info info;
- void *vp;
+ void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
+ static char collate_encoding[ENCODING_LEN + 1];
/* 'encoding' must be already checked. */
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
@@ -83,7 +72,7 @@
/*
* If the locale name is the same as our cache, use the cache.
*/
- if (__collate_data && strcmp(encoding, __collate_data->__encoding) == 0) {
+ if (strcmp(encoding, collate_encoding) == 0) {
__collate_load_error = 0;
return (_LDP_CACHE);
}
@@ -98,9 +87,6 @@
(void)strcat(buf, "/");
(void)strcat(buf, encoding);
(void)strcat(buf, "/LC_COLLATE");
-#ifdef LOCALE_DEBUG
- fprintf(stderr, "__collate_load_tables: opening %s\n", buf);
-#endif
if ((fp = fopen(buf, "r")) == NULL)
return (_LDP_ERROR);
@@ -111,30 +97,23 @@
return (_LDP_ERROR);
}
chains = -1;
- if (strcmp(strbuf, COLLATE_VERSION1_3) == 0)
+ if (strcmp(strbuf, COLLATE_VERSION) == 0)
+ chains = 0;
+ else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
chains = 1;
if (chains < 0) {
- fprintf(stderr, "__collate_load_tables: wrong signature: %s\n", strbuf);
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
}
if (chains) {
- if (fread(&info, sizeof(info), 1, fp) != 1) {
+ if (fread(&u32, sizeof(u32), 1, fp) != 1) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
-#if _BYTE_ORDER == _LITTLE_ENDIAN
- for(z = 0; z < info.directive_count; z++) {
- info.undef_pri[z] = ntohl(info.undef_pri[z]);
- info.subst_count[z] = ntohl(info.subst_count[z]);
- }
- info.chain_count = ntohl(info.chain_count);
- info.large_pri_count = ntohl(info.large_pri_count);
-#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
- if ((chains = info.chain_count) < 0) {
+ if ((chains = (int)ntohl(u32)) < 1) {
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
@@ -142,13 +121,26 @@
} else
chains = TABLE_SIZE;
- i = sizeof(struct __locale_st_collate)
- + sizeof(struct __collate_st_chain_pri) * chains
- + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
- for(z = 0; z < info.directive_count; z++)
- i += sizeof(struct __collate_st_subst) * info.subst_count[z];
- if ((TMP = (struct __locale_st_collate *)malloc(i)) == NULL) {
+ if ((TMP_substitute_table =
+ malloc(sizeof(__collate_substitute_table))) == NULL) {
+ saverr = errno;
+ (void)fclose(fp);
+ errno = saverr;
+ return (_LDP_ERROR);
+ }
+ if ((TMP_char_pri_table =
+ malloc(sizeof(__collate_char_pri_table))) == NULL) {
+ saverr = errno;
+ free(TMP_substitute_table);
+ (void)fclose(fp);
+ errno = saverr;
+ return (_LDP_ERROR);
+ }
+ if ((TMP_chain_pri_table =
+ malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
saverr = errno;
+ free(TMP_substitute_table);
+ free(TMP_char_pri_table);
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
@@ -158,415 +150,114 @@
{ \
if (fread(a, b, c, d) != c) { \
saverr = errno; \
- free(TMP); \
+ free(TMP_substitute_table); \
+ free(TMP_char_pri_table); \
+ free(TMP_chain_pri_table); \
(void)fclose(d); \
errno = saverr; \
return (_LDP_ERROR); \
} \
}
- /* adjust size to read the remaining in one chunk */
- i -= offsetof(struct __locale_st_collate, __char_pri_table);
- FREAD(TMP->__char_pri_table, i, 1, fp);
+ FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
+ FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
+ FREAD(TMP_chain_pri_table,
+ sizeof(*__collate_chain_pri_table), chains, fp);
(void)fclose(fp);
- vp = (void *)(TMP + 1);
-
- /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
- if (info.subst_count[0] > 0) {
- TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
- vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
- } else
- TMP->__substitute_table[0] = NULL;
- if (info.flags & COLLATE_SUBST_DUP)
- TMP->__substitute_table[1] = TMP->__substitute_table[0];
- else if (info.subst_count[1] > 0) {
- TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
- vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
- } else
- TMP->__substitute_table[1] = NULL;
-
- if (chains > 0) {
- TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
- vp += chains * sizeof(struct __collate_st_chain_pri);
- } else
- TMP->__chain_pri_table = NULL;
- if (info.large_pri_count > 0)
- TMP->__large_char_pri_table =
- (struct __collate_st_large_char_pri *)vp;
- else
- TMP->__large_char_pri_table = NULL;
-
-#if _BYTE_ORDER == _LITTLE_ENDIAN
- {
- struct __collate_st_char_pri *p = TMP->__char_pri_table;
- for(i = UCHAR_MAX + 1; i-- > 0; p++) {
- for(z = 0; z < info.directive_count; z++)
- p->pri[z] = ntohl(p->pri[z]);
- }
+ (void)strcpy(collate_encoding, encoding);
+ if (__collate_substitute_table_ptr != NULL)
+ free(__collate_substitute_table_ptr);
+ __collate_substitute_table_ptr = TMP_substitute_table;
+ if (__collate_char_pri_table_ptr != NULL)
+ free(__collate_char_pri_table_ptr);
+ __collate_char_pri_table_ptr = TMP_char_pri_table;
+ for (i = 0; i < UCHAR_MAX + 1; i++) {
+ __collate_char_pri_table[i].prim =
+ ntohl(__collate_char_pri_table[i].prim);
+ __collate_char_pri_table[i].sec =
+ ntohl(__collate_char_pri_table[i].sec);
}
- for(z = 0; z < info.directive_count; z++)
- if (info.subst_count[z] > 0) {
- struct __collate_st_subst *p =
- TMP->__substitute_table[z];
- for(i = info.subst_count[z]; i-- > 0; p++) {
- p->val = ntohl(p->val);
- wntohl(p->str, STR_LEN);
- }
- }
- {
- struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
- for(i = chains; i-- > 0; p++) {
- wntohl(p->str, STR_LEN);
- for(z = 0; z < info.directive_count; z++)
- p->pri[z] = ntohl(p->pri[z]);
- }
+ if (__collate_chain_pri_table != NULL)
+ free(__collate_chain_pri_table);
+ __collate_chain_pri_table = TMP_chain_pri_table;
+ for (i = 0; i < chains; i++) {
+ __collate_chain_pri_table[i].prim =
+ ntohl(__collate_chain_pri_table[i].prim);
+ __collate_chain_pri_table[i].sec =
+ ntohl(__collate_chain_pri_table[i].sec);
}
- if (info.large_pri_count > 0) {
- struct __collate_st_large_char_pri *p =
- TMP->__large_char_pri_table;
- for(i = info.large_pri_count; i-- > 0; p++) {
- p->val = ntohl(p->val);
- for(z = 0; z < info.directive_count; z++)
- p->pri.pri[z] = ntohl(p->pri.pri[z]);
+ __collate_substitute_nontrivial = 0;
+ for (i = 0; i < UCHAR_MAX + 1; i++) {
+ if (__collate_substitute_table[i][0] != i ||
+ __collate_substitute_table[i][1] != 0) {
+ __collate_substitute_nontrivial = 1;
+ break;
}
}
-#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
- (void)strcpy(TMP->__encoding, encoding);
- (void)memcpy(&TMP->__info, &info, sizeof(info));
- __collate_data = TMP;
-
- __collate_load_error = (info.subst_count[0] > 0 ||
- info.subst_count[1] > 0);
__collate_load_error = 0;
-#ifdef LOCALE_DEBUG
- fprintf(stderr, "__collate_load_tables: loaded successfully\n");
-#endif
return (_LDP_LOADED);
}
-static int
-__collate_wcsnlen(const wchar_t *s, int len)
+u_char *
+__collate_substitute(const u_char *s)
{
- int n = 0;
- while (*s && n < len) {
- s++;
- n++;
- }
- return n;
-}
-
-static struct __collate_st_subst *
-substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
-{
- int low = 0;
- int high = n - 1;
- int next, compar;
- struct __collate_st_subst *p;
-
- while (low <= high) {
- next = (low + high) / 2;
- p = tab + next;
- compar = key - p->val;
- if (compar == 0)
- return p;
- if (compar > 0)
- low = next + 1;
- else
- high = next - 1;
- }
- return NULL;
-}
-
-wchar_t *
-__collate_substitute(const wchar_t *s, int which)
-{
int dest_len, len, nlen;
- int n, delta, nsubst;
- wchar_t *dest_str = NULL;
- const wchar_t *fp;
- struct __collate_st_subst *subst, *match;
+ int delta = strlen(s);
+ u_char *dest_str = NULL;
if (s == NULL || *s == '\0')
- return (__collate_wcsdup(L""));
- dest_len = wcslen(s);
- nsubst = __collate_info->subst_count[which];
- if (nsubst <= 0)
- return __collate_wcsdup(s);
- subst = __collate_substitute_table[which];
- delta = dest_len / 4;
- if (delta < 2)
- delta = 2;
- dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
+ return (__collate_strdup(""));
+ delta += delta / 8;
+ dest_str = malloc(dest_len = delta);
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
len = 0;
while (*s) {
- if ((match = substsearch(*s, subst, nsubst)) != NULL) {
- fp = match->str;
- n = __collate_wcsnlen(fp, STR_LEN);
- } else {
- fp = s;
- n = 1;
- }
- nlen = len + n;
+ nlen = len + strlen(__collate_substitute_table[*s]);
if (dest_len <= nlen) {
- dest_str = reallocf(dest_str, (dest_len = nlen + delta)
- * sizeof(wchar_t));
+ dest_str = reallocf(dest_str, dest_len = nlen + delta);
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
}
- wcsncpy(dest_str + len, fp, n);
- len += n;
- s++;
+ (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
+ len = nlen;
}
- dest_str[len] = 0;
return (dest_str);
}
-static struct __collate_st_chain_pri *
-chainsearch(const wchar_t *key, int *len)
-{
- int low = 0;
- int high = __collate_info->chain_count - 1;
- int next, compar, l;
- struct __collate_st_chain_pri *p;
- struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
-
- while (low <= high) {
- next = (low + high) / 2;
- p = tab + next;
- compar = *key - *p->str;
- if (compar == 0) {
- l = __collate_wcsnlen(p->str, STR_LEN);
- compar = wcsncmp(key, p->str, l);
- if (compar == 0) {
- *len = l;
- return p;
- }
- }
- if (compar > 0)
- low = next + 1;
- else
- high = next - 1;
- }
- return NULL;
-}
-
-static struct __collate_st_large_char_pri *
-largesearch(const wchar_t key)
-{
- int low = 0;
- int high = __collate_info->large_pri_count - 1;
- int next, compar;
- struct __collate_st_large_char_pri *p;
- struct __collate_st_large_char_pri *tab =
- __collate_large_char_pri_table;
-
- while (low <= high) {
- next = (low + high) / 2;
- p = tab + next;
- compar = key - p->val;
- if (compar == 0)
- return p;
- if (compar > 0)
- low = next + 1;
- else
- high = next - 1;
- }
- return NULL;
-}
-
void
-__collate_lookup(const wchar_t *t, int *len, int *prim, int *sec)
+__collate_lookup(const u_char *t, int *len, int *prim, int *sec)
{
struct __collate_st_chain_pri *p2;
- int l;
*len = 1;
*prim = *sec = 0;
- p2 = chainsearch(t, &l);
- /* use the chain if prim >= 0 */
- if (p2 && p2->pri[0] >= 0) {
- *len = l;
- *prim = p2->pri[0];
- *sec = p2->pri[1];
- return;
- }
- if (*t <= UCHAR_MAX) {
- *prim = __collate_char_pri_table[*t].pri[0];
- *sec = __collate_char_pri_table[*t].pri[1];
- return;
- }
- if (__collate_info->large_pri_count > 0) {
- struct __collate_st_large_char_pri *match;
- match = largesearch(*t);
- if (match) {
- *prim = match->pri.pri[0];
- *sec = match->pri.pri[1];
+ for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
+ if (*t == p2->str[0] &&
+ strncmp(t, p2->str, strlen(p2->str)) == 0) {
+ *len = strlen(p2->str);
+ *prim = p2->prim;
+ *sec = p2->sec;
return;
}
}
- *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
- *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
+ *prim = __collate_char_pri_table[*t].prim;
+ *sec = __collate_char_pri_table[*t].sec;
}
-void
-__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which)
+u_char *
+__collate_strdup(u_char *s)
{
- struct __collate_st_chain_pri *p2;
- int p, l;
+ u_char *t = strdup(s);
- *len = 1;
- *pri = 0;
- p2 = chainsearch(t, &l);
- if (p2) {
- p = p2->pri[which];
- /* use the chain if pri >= 0 */
- if (p >= 0) {
- *len = l;
- *pri = p;
- return;
- }
- }
- if (*t <= UCHAR_MAX) {
- *pri = __collate_char_pri_table[*t].pri[which];
- return;
- }
- if (__collate_info->large_pri_count > 0) {
- struct __collate_st_large_char_pri *match;
- match = largesearch(*t);
- if (match) {
- *pri = match->pri.pri[which];
- return;
- }
- }
- *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
-}
-
-wchar_t *
-__collate_mbstowcs(const char *s)
-{
- static const mbstate_t initial;
- mbstate_t st;
- size_t len;
- const char *ss;
- wchar_t *wcs;
-
- ss = s;
- st = initial;
- if ((len = mbsrtowcs(NULL, &ss, 0, &st)) == (size_t)-1)
- return NULL;
- if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
+ if (t == NULL)
__collate_err(EX_OSERR, __func__);
- st = initial;
- mbsrtowcs(wcs, &s, len, &st);
- wcs[len] = 0;
-
- return (wcs);
+ return (t);
}
-wchar_t *
-__collate_wcsdup(const wchar_t *s)
-{
- size_t len = wcslen(s) + 1;
- wchar_t *wcs;
-
- if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
- __collate_err(EX_OSERR, __func__);
- wcscpy(wcs, s);
- return (wcs);
-}
-
void
-__collate_xfrm(const wchar_t *src, wchar_t **xf)
-{
- int pri, len;
- size_t slen;
- const wchar_t *t;
- wchar_t *tt = NULL, *tr = NULL;
- int direc, pass;
- wchar_t *xfp;
- struct __collate_st_info *info = __collate_info;
- int sverrno;
-
- for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
- xf[pass] = NULL;
- for(pass = 0; pass < info->directive_count; pass++) {
- direc = info->directive[pass];
- if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
- sverrno = errno;
- free(tt);
- errno = sverrno;
- tt = __collate_substitute(src, pass);
- }
- if (direc & DIRECTIVE_BACKWARD) {
- wchar_t *bp, *fp, c;
- sverrno = errno;
- free(tr);
- errno = sverrno;
- tr = __collate_wcsdup(tt ? tt : src);
- bp = tr;
- fp = tr + wcslen(tr) - 1;
- while(bp < fp) {
- c = *bp;
- *bp++ = *fp;
- *fp-- = c;
- }
- t = (const wchar_t *)tr;
- } else if (tt)
- t = (const wchar_t *)tt;
- else
- t = (const wchar_t *)src;
- sverrno = errno;
- if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) *
- (wcslen(t) + 1))) == NULL) {
- errno = sverrno;
- slen = 0;
- goto end;
- }
- errno = sverrno;
- xfp = xf[pass];
- if (direc & DIRECTIVE_POSITION) {
- while(*t) {
- __collate_lookup_which(t, &len, &pri, pass);
- t += len;
- if (pri <= 0) {
- if (pri < 0) {
- errno = EINVAL;
- slen = 0;
- goto end;
- }
- pri = COLLATE_MAX_PRIORITY;
- }
- *xfp++ = pri;
- }
- } else {
- while(*t) {
- __collate_lookup_which(t, &len, &pri, pass);
- t += len;
- if (pri <= 0) {
- if (pri < 0) {
- errno = EINVAL;
- slen = 0;
- goto end;
- }
- continue;
- }
- *xfp++ = pri;
- }
- }
- *xfp = 0;
- }
- end:
- sverrno = errno;
- free(tt);
- free(tr);
- errno = sverrno;
-}
-
-void
__collate_err(int ex, const char *f)
{
const char *s;
@@ -584,350 +275,24 @@
exit(ex);
}
-/*
- * __collate_collating_symbol takes the multibyte string specified by
- * src and slen, and using ps, converts that to a wide character. Then
- * it is checked to verify it is a collating symbol, and then copies
- * it to the wide character string specified by dst and dlen (the
- * results are not null terminated). The length of the wide characters
- * copied to dst is returned if successful. Zero is returned if no such
- * collating symbol exists. (size_t)-1 is returned if there are wide-character
- * conversion errors, if the length of the converted string is greater that
- * STR_LEN or if dlen is too small. It is up to the calling routine to
- * preserve the mbstate_t structure as needed.
- */
-size_t
-__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src,
- size_t slen, mbstate_t *ps)
-{
- wchar_t wname[STR_LEN];
- wchar_t w, *wp;
- size_t len, l;
-
- /* POSIX locale */
- if (__collate_load_error) {
- if (dlen < 1)
- return (size_t)-1;
- if (slen != 1 || !isascii(*src))
- return 0;
- *dst = *src;
- return 1;
- }
- for(wp = wname, len = 0; slen > 0; len++) {
- l = mbrtowc(&w, src, slen, ps);
- if (l == (size_t)-1 || l == (size_t)-2)
- return (size_t)-1;
- if (l == 0)
- break;
- if (len >= STR_LEN)
- return -1;
- *wp++ = w;
- src += l;
- slen = (long)slen - (long)l;
- }
- if (len == 0 || len > dlen)
- return (size_t)-1;
- if (len == 1) {
- if (*wname <= UCHAR_MAX) {
- if (__collate_char_pri_table[*wname].pri[0] >= 0) {
- if (dlen > 0)
- *dst = *wname;
- return 1;
- }
- return 0;
- } else if (__collate_info->large_pri_count > 0) {
- struct __collate_st_large_char_pri *match;
- match = largesearch(*wname);
- if (match && match->pri.pri[0] >= 0) {
- if (dlen > 0)
- *dst = *wname;
- return 1;
- }
- }
- return 0;
- }
- *wp = 0;
- if (__collate_info->chain_count > 0) {
- struct __collate_st_chain_pri *match;
- int ll;
- match = chainsearch(wname, &ll);
- if (match) {
- if (ll < dlen)
- dlen = ll;
- wcsncpy(dst, wname, dlen);
- return ll;
- }
- }
- return 0;
-}
-
-/*
- * __collate_equiv_class returns the equivalence class number for the symbol
- * specified by src and slen, using ps to convert from multi-byte to wide
- * character. Zero is returned if the symbol is not in an equivalence
- * class. -1 is returned if there are wide character conversion error,
- * if there are any greater-than-8-bit characters or if a multi-byte symbol
- * is greater or equal to STR_LEN in length. It is up to the calling
- * routine to preserve the mbstate_t structure as needed.
- */
-int
-__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps)
-{
- wchar_t wname[STR_LEN];
- wchar_t w, *wp;
- size_t len, l;
- int e;
-
- /* POSIX locale */
- if (__collate_load_error)
- return 0;
- for(wp = wname, len = 0; slen > 0; len++) {
- l = mbrtowc(&w, src, slen, ps);
- if (l == (size_t)-1 || l == (size_t)-2)
- return -1;
- if (l == 0)
- break;
- if (len >= STR_LEN)
- return -1;
- *wp++ = w;
- src += l;
- slen = (long)slen - (long)l;
- }
- if (len == 0)
- return -1;
- if (len == 1) {
- e = -1;
- if (*wname <= UCHAR_MAX)
- e = __collate_char_pri_table[*wname].pri[0];
- else if (__collate_info->large_pri_count > 0) {
- struct __collate_st_large_char_pri *match;
- match = largesearch(*wname);
- if (match)
- e = match->pri.pri[0];
- }
- if (e == 0)
- return IGNORE_EQUIV_CLASS;
- return e > 0 ? e : 0;
- }
- *wp = 0;
- if (__collate_info->chain_count > 0) {
- struct __collate_st_chain_pri *match;
- int ll;
- match = chainsearch(wname, &ll);
- if (match) {
- e = match->pri[0];
- if (e == 0)
- return IGNORE_EQUIV_CLASS;
- return e < 0 ? -e : e;
- }
- }
- return 0;
-}
-
-/*
- * __collate_equiv_match tries to match any single or multi-character symbol
- * in equivalence class equiv_class in the multi-byte string specified by src
- * and slen. If start is non-zero, it is taken to be the first (pre-converted)
- * wide character. Subsequence wide characters, if needed, will use ps in
- * the conversion. On a successful match, the length of the matched string
- * is returned (including the start character). If dst is non-NULL, the
- * matched wide-character string is copied to dst, a wide character array of
- * length dlen (the results are not zero-terminated). If rlen is non-NULL,
- * the number of character in src actually used is returned. Zero is
- * returned by __collate_equiv_match if there is no match. (size_t)-1 is
- * returned on error: if there were conversion errors or if dlen is too small
- * to accept the results. On no match or error, ps is restored to its incoming
- * state.
- */
-size_t
-__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen)
-{
- wchar_t w;
- size_t len, l, clen;
- int i;
- wchar_t buf[STR_LEN], *wp;
- mbstate_t save;
- const char *s = src;
- size_t sl = slen;
- struct __collate_st_chain_pri *ch = NULL;
-
- /* POSIX locale */
- if (__collate_load_error)
- return (size_t)-1;
- if (equiv_class == IGNORE_EQUIV_CLASS)
- equiv_class = 0;
- if (ps)
- save = *ps;
- wp = buf;
- len = clen = 0;
- if (start) {
- *wp++ = start;
- len = 1;
- }
- /* convert up to the max chain length */
- while(sl > 0 && len < __collate_info->chain_max_len) {
- l = mbrtowc(&w, s, sl, ps);
- if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
- break;
- *wp++ = w;
- s += l;
- clen += l;
- sl -= l;
- len++;
- }
- *wp = 0;
- if (len > 1 && (ch = chainsearch(buf, &i)) != NULL) {
- int e = ch->pri[0];
- if (e < 0)
- e = -e;
- if (e == equiv_class)
- goto found;
- }
- /* try single character */
- i = 1;
- if (*buf <= UCHAR_MAX) {
- if (equiv_class == __collate_char_pri_table[*buf].pri[0])
- goto found;
- } else if (__collate_info->large_pri_count > 0) {
- struct __collate_st_large_char_pri *match;
- match = largesearch(*buf);
- if (match && equiv_class == match->pri.pri[0])
- goto found;
- }
- /* no match */
- if (ps)
- *ps = save;
- return 0;
-found:
- /* if we converted more than we used, restore to initial and reconvert
- * up to what did match */
- if (i < len) {
- len = i;
- if (ps)
- *ps = save;
- if (start)
- i--;
- clen = 0;
- while(i-- > 0) {
- l = mbrtowc(&w, src, slen, ps);
- src += l;
- clen += l;
- slen -= l;
- }
- }
- if (dst) {
- if (dlen < len) {
- if (ps)
- *ps = save;
- return (size_t)-1;
- }
- for(wp = buf; len > 0; len--)
- *dst++ = *wp++;
- }
- if (rlen)
- *rlen = clen;
- return len;
-}
-
-#if _BYTE_ORDER == _LITTLE_ENDIAN
-static void
-wntohl(wchar_t *str, int len)
-{
- for(; *str && len > 0; str++, len--)
- *str = ntohl(*str);
-}
-#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
-
#ifdef COLLATE_DEBUG
-static char *
-show(int c)
-{
- static char buf[5];
-
- if (c >=32 && c <= 126)
- sprintf(buf, "'%c' ", c);
- else
- sprintf(buf, "\\x{%02x}", c);
- return buf;
-}
-
-static char *
-showwcs(const wchar_t *t, int len)
-{
- static char buf[64];
- char *cp = buf;
-
- for(; *t && len > 0; len--, t++) {
- if (*t >=32 && *t <= 126)
- *cp++ = *t;
- else {
- sprintf(cp, "\\x{%02x}", *t);
- cp += strlen(cp);
- }
- }
- *cp = 0;
- return buf;
-}
-
void
__collate_print_tables()
{
- int i, z;
- locale_t loc = __current_locale();
+ int i;
+ struct __collate_st_chain_pri *p2;
- printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d "
- "sc=%d cc=%d lc=%d\n",
- __collate_info->directive[0], __collate_info->directive[1],
- __collate_info->flags, __collate_info->chain_max_len,
- __collate_info->directive_count,
- __collate_info->undef_pri[0], __collate_info->undef_pri[1],
- __collate_info->subst_count[0], __collate_info->subst_count[1],
- __collate_info->chain_count, __collate_info->large_pri_count);
- for(z = 0; z < __collate_info->directive_count; z++) {
- if (__collate_info->subst_count[z] > 0) {
- struct __collate_st_subst *p2 =
- __collate_substitute_table[z];
- if (z == 0 && (__collate_info->flags &
- COLLATE_SUBST_DUP))
- printf("Both substitute tables:\n");
- else
- printf("Substitute table %d:\n", z);
- for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
- printf("\t%s --> \"%s\"\n",
- show(p2->val),
- showwcs(p2->str, STR_LEN));
- }
- }
- if (__collate_info->chain_count > 0) {
- printf("Chain priority table:\n");
- struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
- for (i = __collate_info->chain_count; i-- > 0; p2++) {
- printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
- for(z = 0; z < __collate_info->directive_count; z++)
- printf(" %d", p2->pri[z]);
- putchar('\n');
- }
- }
+ printf("Substitute table:\n");
+ for (i = 0; i < UCHAR_MAX + 1; i++)
+ if (i != *__collate_substitute_table[i])
+ printf("\t'%c' --> \"%s\"\n", i,
+ __collate_substitute_table[i]);
+ printf("Chain priority table:\n");
+ for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
+ printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
printf("Char priority table:\n");
- {
- struct __collate_st_char_pri *p2 = __collate_char_pri_table;
- for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
- printf("\t%s :", show(i));
- for(z = 0; z < __collate_info->directive_count; z++)
- printf(" %d", p2->pri[z]);
- putchar('\n');
- }
- }
- if (__collate_info->large_pri_count > 0) {
- struct __collate_st_large_char_pri *p2 =
- __collate_large_char_pri_table;
- printf("Large priority table:\n");
- for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
- printf("\t%s :", show(p2->val));
- for(z = 0; z < __collate_info->directive_count; z++)
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list