svn commit: r230196 - in stable/9: lib/libkiconv sys/conf
sys/fs/msdosfs sys/fs/smbfs sys/kern sys/libkern
sys/modules/libiconv sys/modules/libmchain sys/netsmb sys/sys
Kevin Lo
kevlo at FreeBSD.org
Mon Jan 16 05:15:13 UTC 2012
Author: kevlo
Date: Mon Jan 16 05:15:13 2012
New Revision: 230196
URL: http://svn.freebsd.org/changeset/base/230196
Log:
MFC r228796:
Discarding local array based on return values.
MFC r227650:
Add unicode support to msdosfs and smbfs; original pathes from imura,
bug fixes by Kuan-Chung Chiu <buganini at gmail dot com>.
Added:
stable/9/sys/libkern/iconv_ucs.c (contents, props changed)
Modified:
stable/9/lib/libkiconv/xlat16_iconv.c
stable/9/sys/conf/files
stable/9/sys/fs/msdosfs/msdosfs_conv.c
stable/9/sys/fs/smbfs/smbfs_smb.c
stable/9/sys/fs/smbfs/smbfs_subr.c
stable/9/sys/kern/subr_mchain.c
stable/9/sys/libkern/iconv.c
stable/9/sys/modules/libiconv/Makefile
stable/9/sys/modules/libmchain/Makefile
stable/9/sys/netsmb/smb_conn.c
stable/9/sys/netsmb/smb_conn.h
stable/9/sys/netsmb/smb_smb.c
stable/9/sys/netsmb/smb_subr.c
stable/9/sys/sys/iconv.h
stable/9/sys/sys/mchain.h
Modified: stable/9/lib/libkiconv/xlat16_iconv.c
==============================================================================
--- stable/9/lib/libkiconv/xlat16_iconv.c Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/lib/libkiconv/xlat16_iconv.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -74,6 +74,18 @@ kiconv_add_xlat16_cspair(const char *toc
struct xlat16_table xt;
void *data;
char *p;
+ const char unicode[] = ENCODING_UNICODE;
+
+ if ((flag & KICONV_WCTYPE) == 0 &&
+ strcmp(unicode, tocode) != 0 &&
+ strcmp(unicode, fromcode) != 0 &&
+ kiconv_lookupconv(unicode) == 0) {
+ error = kiconv_add_xlat16_cspair(unicode, fromcode, flag);
+ if (error)
+ return (-1);
+ error = kiconv_add_xlat16_cspair(tocode, unicode, flag);
+ return (error);
+ }
if (kiconv_lookupcs(tocode, fromcode) == 0)
return (0);
Modified: stable/9/sys/conf/files
==============================================================================
--- stable/9/sys/conf/files Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/sys/conf/files Mon Jan 16 05:15:13 2012 (r230196)
@@ -2540,6 +2540,7 @@ libkern/fnmatch.c standard
libkern/gets.c standard
libkern/iconv.c optional libiconv
libkern/iconv_converter_if.m optional libiconv
+libkern/iconv_ucs.c optional libiconv
libkern/iconv_xlat.c optional libiconv
libkern/iconv_xlat16.c optional libiconv
libkern/index.c standard
Modified: stable/9/sys/fs/msdosfs/msdosfs_conv.c
==============================================================================
--- stable/9/sys/fs/msdosfs/msdosfs_conv.c Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/sys/fs/msdosfs/msdosfs_conv.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -61,9 +61,9 @@
extern struct iconv_functions *msdosfs_iconv;
static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle);
-static u_int16_t dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *);
+static u_char * dos2unixchr(u_char *, const u_char **, size_t *, int, struct msdosfsmount *);
static u_int16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *);
-static u_int16_t win2unixchr(u_int16_t, struct msdosfsmount *);
+static u_char * win2unixchr(u_char *, u_int16_t, struct msdosfsmount *);
static u_int16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *);
/*
@@ -242,7 +242,7 @@ dos2unixfn(dn, un, lower, pmp)
{
size_t i;
int thislong = 0;
- u_int16_t c;
+ u_char *c, tmpbuf[5];
/*
* If first char of the filename is SLOT_E5 (0x05), then the real
@@ -257,14 +257,12 @@ dos2unixfn(dn, un, lower, pmp)
* Copy the name portion into the unix filename string.
*/
for (i = 8; i > 0 && *dn != ' ';) {
- c = dos2unixchr((const u_char **)&dn, &i, lower & LCASE_BASE,
- pmp);
- if (c & 0xff00) {
- *un++ = c >> 8;
+ c = dos2unixchr(tmpbuf, (const u_char **)&dn, &i,
+ lower & LCASE_BASE, pmp);
+ while (*c != '\0') {
+ *un++ = *c++;
thislong++;
}
- *un++ = c;
- thislong++;
}
dn += i;
@@ -276,14 +274,12 @@ dos2unixfn(dn, un, lower, pmp)
*un++ = '.';
thislong++;
for (i = 3; i > 0 && *dn != ' ';) {
- c = dos2unixchr((const u_char **)&dn, &i,
+ c = dos2unixchr(tmpbuf, (const u_char **)&dn, &i,
lower & LCASE_EXT, pmp);
- if (c & 0xff00) {
- *un++ = c >> 8;
+ while (*c != '\0') {
+ *un++ = *c++;
thislong++;
}
- *un++ = c;
- thislong++;
}
}
*un++ = 0;
@@ -652,8 +648,9 @@ win2unixfn(nbp, wep, chksum, pmp)
int chksum;
struct msdosfsmount *pmp;
{
+ u_char *c, tmpbuf[5];
u_int8_t *cp;
- u_int8_t *np, name[WIN_CHARS * 2 + 1];
+ u_int8_t *np, name[WIN_CHARS * 3 + 1];
u_int16_t code;
int i;
@@ -686,10 +683,9 @@ win2unixfn(nbp, wep, chksum, pmp)
*np = '\0';
return -1;
default:
- code = win2unixchr(code, pmp);
- if (code & 0xff00)
- *np++ = code >> 8;
- *np++ = code;
+ c = win2unixchr(tmpbuf, code, pmp);
+ while (*c != '\0')
+ *np++ = *c++;
break;
}
cp += 2;
@@ -705,10 +701,9 @@ win2unixfn(nbp, wep, chksum, pmp)
*np = '\0';
return -1;
default:
- code = win2unixchr(code, pmp);
- if (code & 0xff00)
- *np++ = code >> 8;
- *np++ = code;
+ c = win2unixchr(tmpbuf, code, pmp);
+ while (*c != '\0')
+ *np++ = *c++;
break;
}
cp += 2;
@@ -724,10 +719,9 @@ win2unixfn(nbp, wep, chksum, pmp)
*np = '\0';
return -1;
default:
- code = win2unixchr(code, pmp);
- if (code & 0xff00)
- *np++ = code >> 8;
- *np++ = code;
+ c = win2unixchr(tmpbuf, code, pmp);
+ while (*c != '\0')
+ *np++ = *c++;
break;
}
cp += 2;
@@ -817,24 +811,22 @@ mbsadjpos(const char **instr, size_t inl
/*
* Convert DOS char to Local char
*/
-static u_int16_t
-dos2unixchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
+static u_char *
+dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
{
- u_char c;
- char *outp, outbuf[3];
- u_int16_t wc;
+ u_char c, *outp;
size_t len, olen;
+ outp = outbuf;
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
- olen = len = 2;
- outp = outbuf;
+ olen = len = 4;
if (lower & (LCASE_BASE | LCASE_EXT))
msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr,
- ilen, &outp, &olen, KICONV_LOWER);
+ ilen, (char **)&outp, &olen, KICONV_LOWER);
else
msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr,
- ilen, &outp, &olen);
+ ilen, (char **)&outp, &olen);
len -= olen;
/*
@@ -843,21 +835,21 @@ dos2unixchr(const u_char **instr, size_t
if (len == 0) {
(*ilen)--;
(*instr)++;
- return ('?');
+ *outp++ = '?';
}
-
- wc = 0;
- while(len--)
- wc |= (*(outp - len - 1) & 0xff) << (len << 3);
- return (wc);
+ } else {
+ (*ilen)--;
+ c = *(*instr)++;
+ c = dos2unix[c];
+ if (lower & (LCASE_BASE | LCASE_EXT))
+ c = u2l[c];
+ *outp++ = c;
+ outbuf[1] = '\0';
}
- (*ilen)--;
- c = *(*instr)++;
- c = dos2unix[c];
- if (lower & (LCASE_BASE | LCASE_EXT))
- c = u2l[c];
- return ((u_int16_t)c);
+ *outp = '\0';
+ outp = outbuf;
+ return (outp);
}
/*
@@ -940,23 +932,21 @@ unix2doschr(const u_char **instr, size_t
/*
* Convert Windows char to Local char
*/
-static u_int16_t
-win2unixchr(u_int16_t wc, struct msdosfsmount *pmp)
+static u_char *
+win2unixchr(u_char *outbuf, u_int16_t wc, struct msdosfsmount *pmp)
{
- u_char *inp, *outp, inbuf[3], outbuf[3];
+ u_char *inp, *outp, inbuf[3];
size_t ilen, olen, len;
- if (wc == 0)
- return (0);
-
+ outp = outbuf;
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
inbuf[0] = (u_char)(wc>>8);
inbuf[1] = (u_char)wc;
inbuf[2] = '\0';
- ilen = olen = len = 2;
+ ilen = 2;
+ olen = len = 4;
inp = inbuf;
- outp = outbuf;
msdosfs_iconv->convchr(pmp->pm_w2u, (const char **)&inp, &ilen,
(char **)&outp, &olen);
len -= olen;
@@ -964,21 +954,15 @@ win2unixchr(u_int16_t wc, struct msdosfs
/*
* return '?' if failed to convert
*/
- if (len == 0) {
- wc = '?';
- return (wc);
- }
-
- wc = 0;
- while(len--)
- wc |= (*(outp - len - 1) & 0xff) << (len << 3);
- return (wc);
+ if (len == 0)
+ *outp++ = '?';
+ } else {
+ *outp++ = (wc & 0xff00) ? '?' : (u_char)(wc & 0xff);
}
- if (wc & 0xff00)
- wc = '?';
-
- return (wc);
+ *outp = '\0';
+ outp = outbuf;
+ return (outp);
}
/*
Modified: stable/9/sys/fs/smbfs/smbfs_smb.c
==============================================================================
--- stable/9/sys/fs/smbfs/smbfs_smb.c Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/sys/fs/smbfs/smbfs_smb.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -34,6 +34,7 @@
#include <sys/vnode.h>
#include <sys/mbuf.h>
#include <sys/mount.h>
+#include <sys/endian.h>
#ifdef USE_MD5_HASH
#include <sys/md5.h>
@@ -393,6 +394,10 @@ smbfs_smb_setpattr(struct smbnode *np, u
if (error)
break;
mb_put_uint8(mbp, SMB_DT_ASCII);
+ if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) {
+ mb_put_padbyte(mbp);
+ mb_put_uint8(mbp, 0); /* 1st byte of NULL Unicode char */
+ }
mb_put_uint8(mbp, 0);
smb_rq_bend(rqp);
error = smb_rq_simple(rqp);
@@ -909,6 +914,10 @@ smbfs_smb_search(struct smbfs_fctx *ctx)
mb_put_uint16le(mbp, 0); /* context length */
ctx->f_flags &= ~SMBFS_RDD_FINDFIRST;
} else {
+ if (SMB_UNICODE_STRINGS(vcp)) {
+ mb_put_padbyte(mbp);
+ mb_put_uint8(mbp, 0);
+ }
mb_put_uint8(mbp, 0); /* file name length */
mb_put_uint8(mbp, SMB_DT_VARIABLE);
mb_put_uint16le(mbp, SMB_SKEYLEN);
@@ -1069,7 +1078,7 @@ smbfs_smb_trans2find2(struct smbfs_fctx
mb_put_uint32le(mbp, 0); /* resume key */
mb_put_uint16le(mbp, flags);
if (ctx->f_rname)
- mb_put_mem(mbp, ctx->f_rname, strlen(ctx->f_rname) + 1, MB_MSYSTEM);
+ mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM);
else
mb_put_uint8(mbp, 0); /* resume file name */
#if 0
@@ -1152,7 +1161,10 @@ static int
smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp,
const char *wildcard, int wclen, int attr, struct smb_cred *scred)
{
- ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
+ if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+ ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK);
+ } else
+ ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
if (ctx->f_name == NULL)
return ENOMEM;
ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ?
@@ -1231,7 +1243,10 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx
SMBERROR("unexpected info level %d\n", ctx->f_infolevel);
return EINVAL;
}
- nmlen = min(size, SMB_MAXFNAMELEN);
+ if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+ nmlen = min(size, SMB_MAXFNAMELEN * 2);
+ } else
+ nmlen = min(size, SMB_MAXFNAMELEN);
cp = ctx->f_name;
error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM);
if (error)
@@ -1245,8 +1260,12 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx
return EBADRPC;
}
}
- if (nmlen && cp[nmlen - 1] == 0)
- nmlen--;
+ if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+ if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0)
+ nmlen -= 2;
+ } else
+ if (nmlen && cp[nmlen - 1] == 0)
+ nmlen--;
if (nmlen == 0)
return EBADRPC;
@@ -1330,10 +1349,17 @@ smbfs_findnext(struct smbfs_fctx *ctx, i
error = smbfs_findnextLM2(ctx, limit);
if (error)
return error;
- if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
- (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
- ctx->f_name[1] == '.'))
- continue;
+ if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+ if ((ctx->f_nmlen == 2 &&
+ *(u_int16_t *)ctx->f_name == htole16(0x002e)) ||
+ (ctx->f_nmlen == 4 &&
+ *(u_int32_t *)ctx->f_name == htole32(0x002e002e)))
+ continue;
+ } else
+ if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
+ (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
+ ctx->f_name[1] == '.'))
+ continue;
break;
}
smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen,
Modified: stable/9/sys/fs/smbfs/smbfs_subr.c
==============================================================================
--- stable/9/sys/fs/smbfs/smbfs_subr.c Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/sys/fs/smbfs/smbfs_subr.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -130,7 +130,10 @@ smb_fphelp(struct mbchain *mbp, struct s
return smb_put_dmem(mbp, vcp, "\\", 2, caseopt);*/
while (i--) {
np = *--npp;
- error = mb_put_uint8(mbp, '\\');
+ if (SMB_UNICODE_STRINGS(vcp))
+ error = mb_put_uint16le(mbp, '\\');
+ else
+ error = mb_put_uint8(mbp, '\\');
if (error)
break;
error = smb_put_dmem(mbp, vcp, np->n_name, np->n_nmlen, caseopt);
@@ -148,6 +151,11 @@ smbfs_fullpath(struct mbchain *mbp, stru
int caseopt = SMB_CS_NONE;
int error;
+ if (SMB_UNICODE_STRINGS(vcp)) {
+ error = mb_put_padbyte(mbp);
+ if (error)
+ return error;
+ }
if (SMB_DIALECT(vcp) < SMB_DIALECT_LANMAN1_0)
caseopt |= SMB_CS_UPPER;
if (dnp != NULL) {
@@ -156,7 +164,10 @@ smbfs_fullpath(struct mbchain *mbp, stru
return error;
}
if (name) {
- error = mb_put_uint8(mbp, '\\');
+ if (SMB_UNICODE_STRINGS(vcp))
+ error = mb_put_uint16le(mbp, '\\');
+ else
+ error = mb_put_uint8(mbp, '\\');
if (error)
return error;
error = smb_put_dmem(mbp, vcp, name, nmlen, caseopt);
@@ -164,6 +175,8 @@ smbfs_fullpath(struct mbchain *mbp, stru
return error;
}
error = mb_put_uint8(mbp, 0);
+ if (SMB_UNICODE_STRINGS(vcp) && error == 0)
+ error = mb_put_uint8(mbp, 0);
return error;
}
@@ -191,6 +204,17 @@ smbfs_fname_tolocal(struct smb_vc *vcp,
error = iconv_conv_case
(vcp->vc_tolocal, (const char **)&ibuf, &ilen, &obuf, &olen, copt);
+ if (error && SMB_UNICODE_STRINGS(vcp)) {
+ /*
+ * If using unicode, leaving a file name as it was when
+ * convert fails will cause a problem because the file name
+ * will contain NULL.
+ * Here, put '?' and give converted file name.
+ */
+ *obuf = '?';
+ olen--;
+ error = 0;
+ }
if (!error) {
*nmlen = sizeof(outbuf) - olen;
memcpy(name, outbuf, *nmlen);
Modified: stable/9/sys/kern/subr_mchain.c
==============================================================================
--- stable/9/sys/kern/subr_mchain.c Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/sys/kern/subr_mchain.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -128,6 +128,36 @@ mb_reserve(struct mbchain *mbp, int size
}
int
+mb_put_padbyte(struct mbchain *mbp)
+{
+ caddr_t dst;
+ char x = 0;
+
+ dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len;
+
+ /* only add padding if address is odd */
+ if ((unsigned long)dst & 1)
+ return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM);
+ else
+ return 0;
+}
+
+int
+mb_put_padbyte(struct mbchain *mbp)
+{
+ caddr_t dst;
+ char x = 0;
+
+ dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len;
+
+ /* only add padding if address is odd */
+ if ((unsigned long)dst & 1)
+ return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM);
+ else
+ return 0;
+}
+
+int
mb_put_uint8(struct mbchain *mbp, uint8_t x)
{
return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
Modified: stable/9/sys/libkern/iconv.c
==============================================================================
--- stable/9/sys/libkern/iconv.c Mon Jan 16 05:07:32 2012 (r230195)
+++ stable/9/sys/libkern/iconv.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -377,6 +377,18 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern_iconv, OID_AUTO, cslist, CTLFLAG_RD | CTLTYPE_OPAQUE,
NULL, 0, iconv_sysctl_cslist, "S,xlat", "registered charset pairs");
+int
+iconv_add(const char *converter, const char *to, const char *from)
+{
+ struct iconv_converter_class *dcp;
+ struct iconv_cspair *csp;
+
+ if (iconv_lookupconv(converter, &dcp) != 0)
+ return EINVAL;
+
+ return iconv_register_cspair(to, from, dcp, NULL, &csp);
+}
+
/*
* Add new charset pair
*/
Added: stable/9/sys/libkern/iconv_ucs.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/9/sys/libkern/iconv_ucs.c Mon Jan 16 05:15:13 2012 (r230196)
@@ -0,0 +1,540 @@
+/*-
+ * Copyright (c) 2003, 2005 Ryuichiro Imura
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/iconv.h>
+
+#include "iconv_converter_if.h"
+
+/*
+ * "UCS" converter
+ */
+
+#define KICONV_UCS_COMBINE 0x1
+#define KICONV_UCS_FROM_UTF8 0x2
+#define KICONV_UCS_TO_UTF8 0x4
+#define KICONV_UCS_FROM_LE 0x8
+#define KICONV_UCS_TO_LE 0x10
+#define KICONV_UCS_FROM_UTF16 0x20
+#define KICONV_UCS_TO_UTF16 0x40
+#define KICONV_UCS_UCS4 0x80
+
+#define ENCODING_UTF16 "UTF-16BE"
+#define ENCODING_UTF8 "UTF-8"
+
+static struct {
+ const char *name;
+ int from_flag, to_flag;
+} unicode_family[] = {
+ { "UTF-8", KICONV_UCS_FROM_UTF8, KICONV_UCS_TO_UTF8 },
+ { "UCS-2LE", KICONV_UCS_FROM_LE, KICONV_UCS_TO_LE },
+ { "UTF-16BE", KICONV_UCS_FROM_UTF16, KICONV_UCS_TO_UTF16 },
+ { "UTF-16LE", KICONV_UCS_FROM_UTF16|KICONV_UCS_FROM_LE,
+ KICONV_UCS_TO_UTF16|KICONV_UCS_TO_LE },
+ { NULL, 0, 0 }
+};
+
+static uint32_t utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen);
+static u_char *ucs4_to_utf8(uint32_t ucs4, char * dst, size_t *utf8width, size_t dstlen);
+static uint32_t encode_surrogate(uint32_t code);
+static uint32_t decode_surrogate(const u_char *ucs);
+
+#ifdef MODULE_DEPEND
+MODULE_DEPEND(iconv_ucs, libiconv, 2, 2, 2);
+#endif
+
+/*
+ * UCS converter instance
+ */
+struct iconv_ucs {
+ KOBJ_FIELDS;
+ int convtype;
+ struct iconv_cspair * d_csp;
+ struct iconv_cspair * d_cspf;
+ void * f_ctp;
+ void * t_ctp;
+ void * ctype;
+};
+
+static int
+iconv_ucs_open(struct iconv_converter_class *dcp,
+ struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
+{
+ struct iconv_ucs *dp;
+ int i;
+ const char *from, *to;
+
+ dp = (struct iconv_ucs *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
+ to = csp->cp_to;
+ from = cspf ? cspf->cp_from : csp->cp_from;
+
+ dp->convtype = 0;
+
+ if (cspf)
+ dp->convtype |= KICONV_UCS_COMBINE;
+ for (i = 0; unicode_family[i].name; i++) {
+ if (strcmp(from, unicode_family[i].name) == 0)
+ dp->convtype |= unicode_family[i].from_flag;
+ if (strcmp(to, unicode_family[i].name) == 0)
+ dp->convtype |= unicode_family[i].to_flag;
+ }
+ if (strcmp(ENCODING_UNICODE, ENCODING_UTF16) == 0)
+ dp->convtype |= KICONV_UCS_UCS4;
+ else
+ dp->convtype &= ~KICONV_UCS_UCS4;
+
+ dp->f_ctp = dp->t_ctp = NULL;
+ if (dp->convtype & KICONV_UCS_COMBINE) {
+ if ((dp->convtype & KICONV_UCS_FROM_UTF8) == 0 &&
+ (dp->convtype & KICONV_UCS_FROM_LE) == 0) {
+ iconv_open(ENCODING_UNICODE, from, &dp->f_ctp);
+ }
+ if ((dp->convtype & KICONV_UCS_TO_UTF8) == 0 &&
+ (dp->convtype & KICONV_UCS_TO_LE) == 0) {
+ iconv_open(to, ENCODING_UNICODE, &dp->t_ctp);
+ }
+ }
+
+ dp->ctype = NULL;
+ if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_TO_UTF8))
+ iconv_open(KICONV_WCTYPE_NAME, ENCODING_UTF8, &dp->ctype);
+
+ dp->d_csp = csp;
+ if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) {
+ if (cspf) {
+ dp->d_cspf = cspf;
+ cspf->cp_refcount++;
+ } else
+ csp->cp_refcount++;
+ }
+ if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
+ csp->cp_refcount++;
+ *dpp = (void*)dp;
+ return 0;
+}
+
+static int
+iconv_ucs_close(void *data)
+{
+ struct iconv_ucs *dp = data;
+
+ if (dp->f_ctp)
+ iconv_close(dp->f_ctp);
+ if (dp->t_ctp)
+ iconv_close(dp->t_ctp);
+ if (dp->ctype)
+ iconv_close(dp->ctype);
+ if (dp->d_cspf)
+ dp->d_cspf->cp_refcount--;
+ else if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE))
+ dp->d_csp->cp_refcount--;
+ if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
+ dp->d_csp->cp_refcount--;
+ kobj_delete((struct kobj*)data, M_ICONV);
+ return 0;
+}
+
+static int
+iconv_ucs_conv(void *d2p, const char **inbuf,
+ size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
+ int convchar, int casetype)
+{
+ struct iconv_ucs *dp = (struct iconv_ucs*)d2p;
+ int ret = 0, i;
+ size_t in, on, ir, or, inlen, outlen, ucslen;
+ const char *src, *p;
+ char *dst;
+ u_char ucs[4], *q;
+ uint32_t code;
+
+ if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
+ return 0;
+ ir = in = *inbytesleft;
+ or = on = *outbytesleft;
+ src = *inbuf;
+ dst = *outbuf;
+
+ while (ir > 0 && or > 0) {
+
+ /*
+ * The first half of conversion.
+ * (convert any code into ENCODING_UNICODE)
+ */
+ code = 0;
+ p = src;
+ if (dp->convtype & KICONV_UCS_FROM_UTF8) {
+ /* convert UTF-8 to ENCODING_UNICODE */
+ inlen = 0;
+ code = utf8_to_ucs4(p, &inlen, ir);
+ if (code == 0) {
+ ret = -1;
+ break;
+ }
+
+ if (casetype == KICONV_FROM_LOWER && dp->ctype) {
+ code = towlower(code, dp->ctype);
+ } else if (casetype == KICONV_FROM_UPPER && dp->ctype) {
+ code = towupper(code, dp->ctype);
+ }
+
+ if ((code >= 0xd800 && code < 0xe000) || code >= 0x110000 ) {
+ /* reserved for utf-16 surrogate pair */
+ /* invalid unicode */
+ ret = -1;
+ break;
+ }
+
+ if (inlen == 4) {
+ if (dp->convtype & KICONV_UCS_UCS4) {
+ ucslen = 4;
+ code = encode_surrogate(code);
+ } else {
+ /* can't handle with ucs-2 */
+ ret = -1;
+ break;
+ }
+ } else {
+ ucslen = 2;
+ }
+
+ /* save UCS-4 into ucs[] */
+ for (q = ucs, i = ucslen - 1 ; i >= 0 ; i--)
+ *q++ = (code >> (i << 3)) & 0xff;
+
+ } else if (dp->convtype & KICONV_UCS_COMBINE && dp->f_ctp) {
+ /* convert local code to ENCODING_UNICODE */
+ ucslen = 4;
+ inlen = ir;
+ q = ucs;
+ ret = iconv_convchr_case(dp->f_ctp, &p, &inlen, (char **)&q,
+ &ucslen, casetype & (KICONV_FROM_LOWER | KICONV_FROM_UPPER));
+ if (ret)
+ break;
+ inlen = ir - inlen;
+ ucslen = 4 - ucslen;
+
+ } else {
+ /* src code is a proper subset of ENCODING_UNICODE */
+ q = ucs;
+ if (dp->convtype & KICONV_UCS_FROM_LE) {
+ *q = *(p + 1);
+ *(q + 1) = *p;
+ p += 2;
+ } else {
+ *q = *p++;
+ *(q + 1) = *p++;
+ }
+ if ((*q & 0xfc) == 0xd8) {
+ if (dp->convtype & KICONV_UCS_UCS4 &&
+ dp->convtype & KICONV_UCS_FROM_UTF16) {
+ inlen = ucslen = 4;
+ } else {
+ /* invalid unicode */
+ ret = -1;
+ break;
+ }
+ } else {
+ inlen = ucslen = 2;
+ }
+ if (ir < inlen) {
+ ret = -1;
+ break;
+ }
+ if (ucslen == 4) {
+ q += 2;
+ if (dp->convtype & KICONV_UCS_FROM_LE) {
+ *q = *(p + 1);
+ *(q + 1) = *p;
+ } else {
+ *q = *p++;
+ *(q + 1) = *p;
+ }
+ if ((*q & 0xfc) != 0xdc) {
+ /* invalid unicode */
+ ret = -1;
+ break;
+ }
+ }
+ }
+
+ /*
+ * The second half of conversion.
+ * (convert ENCODING_UNICODE into any code)
+ */
+ p = ucs;
+ if (dp->convtype & KICONV_UCS_TO_UTF8) {
+ q = (u_char *)dst;
+ if (ucslen == 4 && dp->convtype & KICONV_UCS_UCS4) {
+ /* decode surrogate pair */
+ code = decode_surrogate(p);
+ } else {
+ code = (ucs[0] << 8) | ucs[1];
+ }
+
+ if (casetype == KICONV_LOWER && dp->ctype) {
+ code = towlower(code, dp->ctype);
+ } else if (casetype == KICONV_UPPER && dp->ctype) {
+ code = towupper(code, dp->ctype);
+ }
+
+ outlen = 0;
+ if (ucs4_to_utf8(code, q, &outlen, or) == NULL) {
+ ret = -1;
+ break;
+ }
+
+ src += inlen;
+ ir -= inlen;
+ dst += outlen;
+ or -= outlen;
+
+ } else if (dp->convtype & KICONV_UCS_COMBINE && dp->t_ctp) {
+ ret = iconv_convchr_case(dp->t_ctp, &p, &ucslen, &dst,
+ &or, casetype & (KICONV_LOWER | KICONV_UPPER));
+ if (ret)
+ break;
+
+ src += inlen;
+ ir -= inlen;
+
+ } else {
+ /* dst code is a proper subset of ENCODING_UNICODE */
+ if (or < ucslen) {
+ ret = -1;
+ break;
+ }
+ src += inlen;
+ ir -= inlen;
+ or -= ucslen;
+ if (dp->convtype & KICONV_UCS_TO_LE) {
+ *dst++ = *(p + 1);
+ *dst++ = *p;
+ p += 2;
+ } else {
+ *dst++ = *p++;
+ *dst++ = *p++;
+ }
+ if (ucslen == 4) {
+ if ((dp->convtype & KICONV_UCS_UCS4) == 0 ||
+ (dp->convtype & KICONV_UCS_TO_UTF16) == 0) {
+ ret = -1;
+ break;
+ }
+ if (dp->convtype & KICONV_UCS_TO_LE) {
+ *dst++ = *(p + 1);
+ *dst++ = *p;
+ } else {
+ *dst++ = *p++;
+ *dst++ = *p;
+ }
+ }
+ }
+
+ if (convchar == 1)
+ break;
+ }
+
+ *inbuf += in - ir;
+ *outbuf += on - or;
+ *inbytesleft -= in - ir;
+ *outbytesleft -= on - or;
+ return (ret);
+}
+
+static int
+iconv_ucs_init(struct iconv_converter_class *dcp)
+{
+ int error;
+
+ error = iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, ENCODING_UTF8);
+ if (error)
+ return (error);
+ error = iconv_add(ENCODING_UNICODE, ENCODING_UTF8, ENCODING_UNICODE);
+ if (error)
+ return (error);
+ return (0);
+}
+
+static int
+iconv_ucs_done(struct iconv_converter_class *dcp)
+{
+ return (0);
+}
+
+static const char *
+iconv_ucs_name(struct iconv_converter_class *dcp)
+{
+ return (ENCODING_UNICODE);
+}
+
+static kobj_method_t iconv_ucs_methods[] = {
+ KOBJMETHOD(iconv_converter_open, iconv_ucs_open),
+ KOBJMETHOD(iconv_converter_close, iconv_ucs_close),
+ KOBJMETHOD(iconv_converter_conv, iconv_ucs_conv),
+ KOBJMETHOD(iconv_converter_init, iconv_ucs_init),
+ KOBJMETHOD(iconv_converter_done, iconv_ucs_done),
+ KOBJMETHOD(iconv_converter_name, iconv_ucs_name),
+ {0, 0}
+};
+
+KICONV_CONVERTER(ucs, sizeof(struct iconv_ucs));
+
+static uint32_t
+utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen)
+{
+ size_t i, w = 0;
+ uint32_t ucs4 = 0;
+
+ /*
+ * get leading 1 byte from utf-8
+ */
+ if ((*src & 0x80) == 0) {
+ /*
+ * leading 1 bit is "0"
+ * utf-8: 0xxxxxxx
+ * ucs-4: 00000000 00000000 00000000 0xxxxxxx
+ */
+ w = 1;
+ /* get trailing 7 bits */
+ ucs4 = *src & 0x7f;
+ } else if ((*src & 0xe0) == 0xc0) {
+ /*
+ * leading 3 bits are "110"
+ * utf-8: 110xxxxx 10yyyyyy
+ * ucs-4: 00000000 00000000 00000xxx xxyyyyyy
+ */
+ w = 2;
+ /* get trailing 5 bits */
+ ucs4 = *src & 0x1f;
+ } else if ((*src & 0xf0) == 0xe0) {
+ /*
+ * leading 4 bits are "1110"
+ * utf-8: 1110xxxx 10yyyyyy 10zzzzzz
+ * ucs-4: 00000000 00000000 xxxxyyyy yyzzzzzz
+ */
+ w = 3;
+ /* get trailing 4 bits */
+ ucs4 = *src & 0x0f;
+ } else if ((*src & 0xf8) == 0xf0) {
+ /*
+ * leading 5 bits are "11110"
+ * utf-8: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ * ucs-4: 00000000 000wwwxx xxxxyyyy yyzzzzzz
+ */
+ w = 4;
+ /* get trailing 3 bits */
+ ucs4 = *src & 0x07;
+ } else {
+ /* out of utf-16 range or having illegal bits */
+ return (0);
+ }
+ if (w == 0)
+ return (0);
+
+ if (srclen < w)
+ return (0);
+
+ /*
+ * get left parts from utf-8
+ */
+ for (i = 1 ; i < w ; i++) {
+ if ((*(src + i) & 0xc0) != 0x80) {
+ /* invalid: leading 2 bits are not "10" */
+ return (0);
+ }
+ /* concatenate trailing 6 bits into ucs4 */
+ ucs4 <<= 6;
+ ucs4 |= *(src + i) & 0x3f;
+ }
+
+ *utf8width = w;
+ return (ucs4);
+}
+
+static u_char *
+ucs4_to_utf8(uint32_t ucs4, char *dst, size_t *utf8width, size_t dstlen)
+{
+ u_char lead, *p;
+ size_t i, w;
+
+ /*
+ * determine utf-8 width and leading bits
+ */
+ if (ucs4 < 0x80) {
+ w = 1;
+ lead = 0; /* "0" */
+ } else if (ucs4 < 0x800) {
+ w = 2;
+ lead = 0xc0; /* "11" */
+ } else if (ucs4 < 0x10000) {
+ w = 3;
+ lead = 0xe0; /* "111" */
+ } else if (ucs4 < 0x200000) {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list