svn commit: r249560 - in stable/9: contrib/libc-vis lib/libc/gen
Brooks Davis
brooks at FreeBSD.org
Tue Apr 16 19:27:10 UTC 2013
Author: brooks
Date: Tue Apr 16 19:27:09 2013
New Revision: 249560
URL: http://svnweb.freebsd.org/changeset/base/249560
Log:
MFC r248302:
Update to the latest (un)vis(3) sources from NetBSD. This adds
multibyte support[0] and the new functions strenvisx and strsenvisx.
Add MLINKS for vis(3) functions add by this and the initial import from
NetBSD[1].
PR: bin/166364, bin/175418
Submitted by: "J.R. Oldroyd" <fbsd at opal.com>[0]
stefanf[1]
Obtained from: NetBSD
Modified:
stable/9/contrib/libc-vis/unvis.3
stable/9/contrib/libc-vis/unvis.c
stable/9/contrib/libc-vis/vis.3
stable/9/contrib/libc-vis/vis.c
stable/9/contrib/libc-vis/vis.h
stable/9/lib/libc/gen/Makefile.inc
stable/9/lib/libc/gen/Symbol.map
Directory Properties:
stable/9/contrib/libc-vis/ (props changed)
stable/9/lib/libc/ (props changed)
Modified: stable/9/contrib/libc-vis/unvis.3
==============================================================================
--- stable/9/contrib/libc-vis/unvis.3 Tue Apr 16 19:25:41 2013 (r249559)
+++ stable/9/contrib/libc-vis/unvis.3 Tue Apr 16 19:27:09 2013 (r249560)
@@ -1,4 +1,4 @@
-.\" $NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $
+.\" $NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $
.\" $FreeBSD$
.\"
.\" Copyright (c) 1989, 1991, 1993
@@ -126,15 +126,17 @@ The
function has several return codes that must be handled properly.
They are:
.Bl -tag -width UNVIS_VALIDPUSH
-.It Li \&0 (zero)
+.It Li \&0 No (zero)
Another character is necessary; nothing has been recognized yet.
.It Dv UNVIS_VALID
A valid character has been recognized and is available at the location
-pointed to by cp.
+pointed to by
+.Fa cp .
.It Dv UNVIS_VALIDPUSH
A valid character has been recognized and is available at the location
-pointed to by cp; however, the character currently passed in should
-be passed in again.
+pointed to by
+.Fa cp ;
+however, the character currently passed in should be passed in again.
.It Dv UNVIS_NOCHAR
A valid sequence was detected, but no character was produced.
This return code is necessary to indicate a logical break between characters.
@@ -150,7 +152,7 @@ one more time with flag set to
to extract any remaining character (the character passed in is ignored).
.Pp
The
-.Ar flag
+.Fa flag
argument is also used to specify the encoding style of the source.
If set to
.Dv VIS_HTTPSTYLE
@@ -161,7 +163,8 @@ will decode URI strings as specified in
If set to
.Dv VIS_HTTP1866 ,
.Fn unvis
-will decode URI strings as specified in RFC 1866.
+will decode entity references and numeric character references
+as specified in RFC 1866.
If set to
.Dv VIS_MIMESTYLE ,
.Fn unvis
@@ -169,7 +172,9 @@ will decode MIME Quoted-Printable string
If set to
.Dv VIS_NOESCAPE ,
.Fn unvis
-will not decode \e quoted characters.
+will not decode
+.Ql \e
+quoted characters.
.Pp
The following code fragment illustrates a proper use of
.Fn unvis .
@@ -204,7 +209,7 @@ The functions
and
.Fn strnunvisx
will return \-1 on error and set
-.Va errno
+.Va errno
to:
.Bl -tag -width Er
.It Bq Er EINVAL
@@ -212,7 +217,7 @@ An invalid escape sequence was detected,
.El
.Pp
In addition the functions
-.Fn strnunvis
+.Fn strnunvis
and
.Fn strnunvisx
will can also set
@@ -244,4 +249,14 @@ and
functions appeared in
.Nx 6.0
and
-.Fx 10.0 .
+.Fx 9.2 .
+.Sh BUGS
+The names
+.Dv VIS_HTTP1808
+and
+.Dv VIS_HTTP1866
+are wrong.
+Percent-encoding was defined in RFC 1738, the original RFC for URL.
+RFC 1866 defines HTML 2.0, an application of SGML, from which it
+inherits concepts of numeric character references and entity
+references.
Modified: stable/9/contrib/libc-vis/unvis.c
==============================================================================
--- stable/9/contrib/libc-vis/unvis.c Tue Apr 16 19:25:41 2013 (r249559)
+++ stable/9/contrib/libc-vis/unvis.c Tue Apr 16 19:27:09 2013 (r249560)
@@ -1,4 +1,4 @@
-/* $NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $ */
+/* $NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $ */
/*-
* Copyright (c) 1989, 1993
@@ -34,7 +34,7 @@
#if 0
static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93";
#else
-__RCSID("$NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $");
+__RCSID("$NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
__FBSDID("$FreeBSD$");
@@ -90,7 +90,7 @@ __weak_alias(strnunvisx,_strnunvisx)
* RFC 1866
*/
static const struct nv {
- const char name[7];
+ char name[7];
uint8_t value;
} nv[] = {
{ "AElig", 198 }, /* capital AE diphthong (ligature) */
Modified: stable/9/contrib/libc-vis/vis.3
==============================================================================
--- stable/9/contrib/libc-vis/vis.3 Tue Apr 16 19:25:41 2013 (r249559)
+++ stable/9/contrib/libc-vis/vis.3 Tue Apr 16 19:27:09 2013 (r249560)
@@ -1,4 +1,4 @@
-.\" $NetBSD: vis.3,v 1.29 2012/12/14 22:55:59 christos Exp $
+.\" $NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $
.\" $FreeBSD$
.\"
.\" Copyright (c) 1989, 1991, 1993
@@ -30,7 +30,7 @@
.\"
.\" @(#)vis.3 8.1 (Berkeley) 6/9/93
.\"
-.Dd December 14, 2012
+.Dd February 19, 2013
.Dt VIS 3
.Os
.Sh NAME
@@ -40,12 +40,14 @@
.Nm strnvis ,
.Nm strvisx ,
.Nm strnvisx ,
+.Nm strenvisx ,
.Nm svis ,
.Nm snvis ,
.Nm strsvis ,
.Nm strsnvis ,
-.Nm strsvisx
-.Nm strsnvisx
+.Nm strsvisx ,
+.Nm strsnvisx ,
+.Nm strsenvisx
.Nd visually encode characters
.Sh LIBRARY
.Lb libc
@@ -63,6 +65,8 @@
.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
.Ft int
.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
+.Ft int
+.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
.Ft char *
.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
.Ft char *
@@ -75,6 +79,8 @@
.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
.Ft int
.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
+.Ft int
+.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
.Sh DESCRIPTION
The
.Fn vis
@@ -89,11 +95,11 @@ needs no encoding, it is copied in unalt
The string is null terminated, and a pointer to the end of the string is
returned.
The maximum length of any encoding is four
-characters (not including the trailing
+bytes (not including the trailing
.Dv NUL ) ;
thus, when
encoding a set of characters into a buffer, the size of the buffer should
-be four times the number of characters encoded, plus one for the trailing
+be four times the number of bytes encoded, plus one for the trailing
.Dv NUL .
The flag parameter is used for altering the default range of
characters considered for encoding and for altering the visual
@@ -142,16 +148,17 @@ terminate
The size of
.Fa dst
must be four times the number
-of characters encoded from
+of bytes encoded from
.Fa src
(plus one for the
.Dv NUL ) .
Both
-forms return the number of characters in dst (not including
-the trailing
+forms return the number of characters in
+.Fa dst
+(not including the trailing
.Dv NUL ) .
The
-.Dq n
+.Dq Nm n
versions of the functions also take an additional argument
.Fa dlen
that indicates the length of the
@@ -159,7 +166,7 @@ that indicates the length of the
buffer.
If
.Fa dlen
-is not large enough to fix the converted string then the
+is not large enough to fit the converted string then the
.Fn strnvis
and
.Fn strnvisx
@@ -167,6 +174,14 @@ functions return \-1 and set
.Va errno
to
.Dv ENOSPC .
+The
+.Fn strenvisx
+function takes an additional argument,
+.Fa cerr_ptr ,
+that is used to pass in and out a multibyte conversion error flag.
+This is useful when processing single characters at a time when
+it is possible that the locale may be set to something other
+than the locale of the characters in the input data.
.Pp
The functions
.Fn svis ,
@@ -174,16 +189,18 @@ The functions
.Fn strsvis ,
.Fn strsnvis ,
.Fn strsvisx ,
+.Fn strsnvisx ,
and
-.Fn strsnvisx
+.Fn strsenvisx
correspond to
.Fn vis ,
.Fn nvis ,
.Fn strvis ,
.Fn strnvis ,
.Fn strvisx ,
+.Fn strnvisx ,
and
-.Fn strnvisx
+.Fn strenvisx
but have an additional argument
.Fa extra ,
pointing to a
@@ -214,14 +231,13 @@ and
.Fn strnvisx ) ,
and the type of representation used.
By default, all non-graphic characters,
-except space, tab, and newline are encoded.
-(See
-.Xr isgraph 3 . )
+except space, tab, and newline are encoded (see
+.Xr isgraph 3 ) .
The following flags
alter this:
.Bl -tag -width VIS_WHITEX
.It Dv VIS_GLOB
-Also encode magic characters
+Also encode the magic characters
.Ql ( * ,
.Ql \&? ,
.Ql \&[
@@ -243,11 +259,13 @@ Synonym for
\&|
.Dv VIS_NL .
.It Dv VIS_SAFE
-Only encode "unsafe" characters.
+Only encode
+.Dq unsafe
+characters.
Unsafe means control characters which may cause common terminals to perform
unexpected functions.
Currently this form allows space, tab, newline, backspace, bell, and
-return - in addition to all graphic characters - unencoded.
+return \(em in addition to all graphic characters \(em unencoded.
.El
.Pp
(The above flags have no effect for
@@ -287,8 +305,8 @@ Use an
to represent meta characters (characters with the 8th
bit set), and use caret
.Ql ^
-to represent control characters see
-.Pf ( Xr iscntrl 3 ) .
+to represent control characters (see
+.Xr iscntrl 3 ) .
The following formats are used:
.Bl -tag -width xxxxx
.It Dv \e^C
@@ -335,19 +353,20 @@ Use C-style backslash sequences to repre
characters.
The following sequences are used to represent the indicated characters:
.Bd -unfilled -offset indent
-.Li \ea Tn - BEL No (007)
-.Li \eb Tn - BS No (010)
-.Li \ef Tn - NP No (014)
-.Li \en Tn - NL No (012)
-.Li \er Tn - CR No (015)
-.Li \es Tn - SP No (040)
-.Li \et Tn - HT No (011)
-.Li \ev Tn - VT No (013)
-.Li \e0 Tn - NUL No (000)
+.Li \ea Tn \(em BEL No (007)
+.Li \eb Tn \(em BS No (010)
+.Li \ef Tn \(em NP No (014)
+.Li \en Tn \(em NL No (012)
+.Li \er Tn \(em CR No (015)
+.Li \es Tn \(em SP No (040)
+.Li \et Tn \(em HT No (011)
+.Li \ev Tn \(em VT No (013)
+.Li \e0 Tn \(em NUL No (000)
.Ed
.Pp
-When using this format, the nextc parameter is looked at to determine
-if a
+When using this format, the
+.Fa nextc
+parameter is looked at to determine if a
.Dv NUL
character can be encoded as
.Ql \e0
@@ -374,8 +393,8 @@ represents a lower case hexadecimal digi
.It Dv VIS_MIMESTYLE
Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
break lines and don't handle CRLF.
-The form is:
-.Ql %XX
+The form is
+.Ql =XX
where
.Em X
represents an upper case hexadecimal digit.
@@ -392,6 +411,41 @@ meta characters as
.Ql M-C ) .
With this flag set, the encoding is
ambiguous and non-invertible.
+.Sh MULTIBYTE CHARACTER SUPPORT
+These functions support multibyte character input.
+The encoding conversion is influenced by the setting of the
+.Ev LC_CTYPE
+environment variable which defines the set of characters
+that can be copied without encoding.
+.Pp
+When 8-bit data is present in the input,
+.Ev LC_CTYPE
+must be set to the correct locale or to the C locale.
+If the locales of the data and the conversion are mismatched,
+multibyte character recognition may fail and encoding will be performed
+byte-by-byte instead.
+.Pp
+As noted above,
+.Fa dst
+must be four times the number of bytes processed from
+.Fa src .
+But note that each multibyte character can be up to
+.Dv MB_LEN_MAX
+bytes
+.\" (see
+.\" .Xr multibyte 3 )
+so in terms of multibyte characters,
+.Fa dst
+must be four times
+.Dv MB_LEN_MAX
+times the number of characters processed from
+.Fa src .
+.Sh ENVIRONMENT
+.Bl -tag -width ".Ev LC_CTYPE"
+.It Ev LC_CTYPE
+Specify the locale of the input data.
+Set to C if the input data locale is unknown.
+.El
.Sh ERRORS
The functions
.Fn nvis
@@ -407,11 +461,11 @@ and
.Fn strsnvisx ,
will return \-1 when the
.Fa dlen
-destination buffer length size is not enough to perform the conversion while
+destination buffer size is not enough to perform the conversion while
setting
.Va errno
to:
-.Bl -tag -width Er
+.Bl -tag -width ".Bq Er ENOSPC"
.It Bq Er ENOSPC
The destination buffer size is not large enough to perform the conversion.
.El
@@ -419,18 +473,23 @@ The destination buffer size is not large
.Xr unvis 1 ,
.Xr vis 1 ,
.Xr glob 3 ,
+.\" .Xr multibyte 3 ,
.Xr unvis 3
.Rs
.%A T. Berners-Lee
.%T Uniform Resource Locators (URL)
-.%O RFC1738
+.%O "RFC 1738"
+.Re
+.Rs
+.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
+.%O "RFC 2045"
.Re
.Sh HISTORY
The
.Fn vis ,
.Fn strvis ,
and
-.Fa strvisx
+.Fn strvisx
functions first appeared in
.Bx 4.4 .
The
@@ -441,7 +500,7 @@ and
functions appeared in
.Nx 1.5
and
-.Fx 10.0 .
+.Fx 9.2 .
The buffer size limited versions of the functions
.Po Fn nvis ,
.Fn strnvis ,
@@ -451,6 +510,9 @@ The buffer size limited versions of the
and
.Fn strsnvisx Pc
appeared in
-.Nx 6.0
and
-.Fx 10.0 .
+.Fx 9.2 .
+Myltibyte character support was added in
+.Nx 7.0
+and
+.Fx 9.2 .
Modified: stable/9/contrib/libc-vis/vis.c
==============================================================================
--- stable/9/contrib/libc-vis/vis.c Tue Apr 16 19:25:41 2013 (r249559)
+++ stable/9/contrib/libc-vis/vis.c Tue Apr 16 19:27:09 2013 (r249560)
@@ -1,4 +1,4 @@
-/* $NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $ */
+/* $NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $ */
/*-
* Copyright (c) 1989, 1993
@@ -57,19 +57,23 @@
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $");
#endif /* LIBC_SCCS and not lint */
+#ifdef __FBSDID
__FBSDID("$FreeBSD$");
+#define _DIAGASSERT(x) assert(x)
+#endif
#include "namespace.h"
#include <sys/types.h>
+#include <sys/param.h>
#include <assert.h>
#include <vis.h>
#include <errno.h>
#include <stdlib.h>
-
-#define _DIAGASSERT(x) assert(x)
+#include <wchar.h>
+#include <wctype.h>
#ifdef __weak_alias
__weak_alias(strvisx,_strvisx)
@@ -81,65 +85,66 @@ __weak_alias(strvisx,_strvisx)
#include <stdio.h>
#include <string.h>
-static char *do_svis(char *, size_t *, int, int, int, const char *);
+/*
+ * The reason for going through the trouble to deal with character encodings
+ * in vis(3), is that we use this to safe encode output of commands. This
+ * safe encoding varies depending on the character set. For example if we
+ * display ps output in French, we don't want to display French characters
+ * as M-foo.
+ */
+
+static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
#undef BELL
-#define BELL '\a'
+#define BELL L'\a'
+
+#define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
+#define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n')
+#define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r')
+#define xtoa(c) L"0123456789abcdef"[c]
+#define XTOA(c) L"0123456789ABCDEF"[c]
-#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
-#define iswhite(c) (c == ' ' || c == '\t' || c == '\n')
-#define issafe(c) (c == '\b' || c == BELL || c == '\r')
-#define xtoa(c) "0123456789abcdef"[c]
-#define XTOA(c) "0123456789ABCDEF"[c]
-
-#define MAXEXTRAS 9
-
-#define MAKEEXTRALIST(flag, extra, orig_str) \
-do { \
- const char *orig = orig_str; \
- const char *o = orig; \
- char *e; \
- while (*o++) \
- continue; \
- extra = malloc((size_t)((o - orig) + MAXEXTRAS)); \
- if (!extra) break; \
- for (o = orig, e = extra; (*e++ = *o++) != '\0';) \
- continue; \
- e--; \
- if (flag & VIS_GLOB) { \
- *e++ = '*'; \
- *e++ = '?'; \
- *e++ = '['; \
- *e++ = '#'; \
- } \
- if (flag & VIS_SP) *e++ = ' '; \
- if (flag & VIS_TAB) *e++ = '\t'; \
- if (flag & VIS_NL) *e++ = '\n'; \
- if ((flag & VIS_NOSLASH) == 0) *e++ = '\\'; \
- *e = '\0'; \
-} while (/*CONSTCOND*/0)
+#define MAXEXTRAS 10
+
+#if !HAVE_NBTOOL_CONFIG_H
+#ifndef __NetBSD__
+/*
+ * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
+ * integral type and it is probably wrong, since currently the maximum
+ * number of bytes and character needs is 6. Until this is fixed, the
+ * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
+ * the assertion is commented out.
+ */
+#ifdef __FreeBSD__
+/*
+ * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
+ * mode.
+ */
+#ifndef CTASSERT
+#define CTASSERT(x) _CTASSERT(x, __LINE__)
+#define _CTASSERT(x, y) __CTASSERT(x, y)
+#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
+#endif
+#endif /* __FreeBSD__ */
+CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
+#endif /* !__NetBSD__ */
+#endif
/*
* This is do_hvis, for HTTP style (RFC 1808)
*/
-static char *
-do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
-
- if ((isascii(c) && isalnum(c))
+ if (iswalnum(c)
/* safe */
- || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
+ || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
/* extra */
- || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
- || c == ',') {
- dst = do_svis(dst, dlen, c, flag, nextc, extra);
- } else {
- if (dlen) {
- if (*dlen < 3)
- return NULL;
- *dlen -= 3;
- }
- *dst++ = '%';
+ || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
+ || c == L',')
+ dst = do_svis(dst, c, flags, nextc, extra);
+ else {
+ *dst++ = L'%';
*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
*dst++ = xtoa((unsigned int)c & 0xf);
}
@@ -151,312 +156,448 @@ do_hvis(char *dst, size_t *dlen, int c,
* This is do_mvis, for Quoted-Printable MIME (RFC 2045)
* NB: No handling of long lines or CRLF.
*/
-static char *
-do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
{
- if ((c != '\n') &&
+ if ((c != L'\n') &&
/* Space at the end of the line */
- ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
+ ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
/* Out of range */
- (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
- /* Specific char to be escaped */
- strchr("#$@[\\]^`{|}~", c) != NULL)) {
- if (dlen) {
- if (*dlen < 3)
- return NULL;
- *dlen -= 3;
- }
- *dst++ = '=';
+ (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
+ /* Specific char to be escaped */
+ wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
+ *dst++ = L'=';
*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
*dst++ = XTOA((unsigned int)c & 0xf);
- } else {
- dst = do_svis(dst, dlen, c, flag, nextc, extra);
- }
+ } else
+ dst = do_svis(dst, c, flags, nextc, extra);
return dst;
}
/*
- * This is do_vis, the central code of vis.
- * dst: Pointer to the destination buffer
- * c: Character to encode
- * flag: Flag word
- * nextc: The character following 'c'
- * extra: Pointer to the list of extra characters to be
- * backslash-protected.
+ * Output single byte of multibyte character.
*/
-static char *
-do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
{
- int isextra;
- size_t odlen = dlen ? *dlen : 0;
-
- isextra = strchr(extra, c) != NULL;
-#define HAVE(x) \
- do { \
- if (dlen) { \
- if (*dlen < (x)) \
- goto out; \
- *dlen -= (x); \
- } \
- } while (/*CONSTCOND*/0)
- if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
- ((flag & VIS_SAFE) && issafe(c)))) {
- HAVE(1);
- *dst++ = c;
- return dst;
- }
- if (flag & VIS_CSTYLE) {
- HAVE(2);
+ if (flags & VIS_CSTYLE) {
switch (c) {
- case '\n':
- *dst++ = '\\'; *dst++ = 'n';
+ case L'\n':
+ *dst++ = L'\\'; *dst++ = L'n';
return dst;
- case '\r':
- *dst++ = '\\'; *dst++ = 'r';
+ case L'\r':
+ *dst++ = L'\\'; *dst++ = L'r';
return dst;
- case '\b':
- *dst++ = '\\'; *dst++ = 'b';
+ case L'\b':
+ *dst++ = L'\\'; *dst++ = L'b';
return dst;
case BELL:
- *dst++ = '\\'; *dst++ = 'a';
+ *dst++ = L'\\'; *dst++ = L'a';
return dst;
- case '\v':
- *dst++ = '\\'; *dst++ = 'v';
+ case L'\v':
+ *dst++ = L'\\'; *dst++ = L'v';
return dst;
- case '\t':
- *dst++ = '\\'; *dst++ = 't';
+ case L'\t':
+ *dst++ = L'\\'; *dst++ = L't';
return dst;
- case '\f':
- *dst++ = '\\'; *dst++ = 'f';
+ case L'\f':
+ *dst++ = L'\\'; *dst++ = L'f';
return dst;
- case ' ':
- *dst++ = '\\'; *dst++ = 's';
+ case L' ':
+ *dst++ = L'\\'; *dst++ = L's';
return dst;
- case '\0':
- *dst++ = '\\'; *dst++ = '0';
- if (isoctal(nextc)) {
- HAVE(2);
- *dst++ = '0';
- *dst++ = '0';
+ case L'\0':
+ *dst++ = L'\\'; *dst++ = L'0';
+ if (iswoctal(nextc)) {
+ *dst++ = L'0';
+ *dst++ = L'0';
}
return dst;
default:
- if (isgraph(c)) {
- *dst++ = '\\'; *dst++ = c;
+ if (iswgraph(c)) {
+ *dst++ = L'\\';
+ *dst++ = c;
return dst;
}
- if (dlen)
- *dlen = odlen;
}
}
- if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
- HAVE(4);
- *dst++ = '\\';
- *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
- *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
- *dst++ = (c & 07) + '0';
+ if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
+ *dst++ = L'\\';
+ *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
+ *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
+ *dst++ = (c & 07) + L'0';
} else {
- if ((flag & VIS_NOSLASH) == 0) {
- HAVE(1);
- *dst++ = '\\';
- }
+ if ((flags & VIS_NOSLASH) == 0)
+ *dst++ = L'\\';
if (c & 0200) {
- HAVE(1);
- c &= 0177; *dst++ = 'M';
+ c &= 0177;
+ *dst++ = L'M';
}
- if (iscntrl(c)) {
- HAVE(2);
- *dst++ = '^';
+ if (iswcntrl(c)) {
+ *dst++ = L'^';
if (c == 0177)
- *dst++ = '?';
+ *dst++ = L'?';
else
- *dst++ = c + '@';
+ *dst++ = c + L'@';
} else {
- HAVE(2);
- *dst++ = '-'; *dst++ = c;
+ *dst++ = L'-';
+ *dst++ = c;
}
}
+
+ return dst;
+}
+
+/*
+ * This is do_vis, the central code of vis.
+ * dst: Pointer to the destination buffer
+ * c: Character to encode
+ * flags: Flags word
+ * nextc: The character following 'c'
+ * extra: Pointer to the list of extra characters to be
+ * backslash-protected.
+ */
+static wchar_t *
+do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
+{
+ int iswextra, i, shft;
+ uint64_t bmsk, wmsk;
+
+ iswextra = wcschr(extra, c) != NULL;
+ if (!iswextra && (iswgraph(c) || iswwhite(c) ||
+ ((flags & VIS_SAFE) && iswsafe(c)))) {
+ *dst++ = c;
+ return dst;
+ }
+
+ /* See comment in istrsenvisx() output loop, below. */
+ wmsk = 0;
+ for (i = sizeof(wmsk) - 1; i >= 0; i--) {
+ shft = i * NBBY;
+ bmsk = (uint64_t)0xffLL << shft;
+ wmsk |= bmsk;
+ if ((c & wmsk) || i == 0)
+ dst = do_mbyte(dst, (wint_t)(
+ (uint64_t)(c & bmsk) >> shft),
+ flags, nextc, iswextra);
+ }
+
return dst;
-out:
- *dlen = odlen;
- return NULL;
}
-typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
+typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
/*
* Return the appropriate encoding function depending on the flags given.
*/
static visfun_t
-getvisfun(int flag)
+getvisfun(int flags)
{
- if (flag & VIS_HTTPSTYLE)
+ if (flags & VIS_HTTPSTYLE)
return do_hvis;
- if (flag & VIS_MIMESTYLE)
+ if (flags & VIS_MIMESTYLE)
return do_mvis;
return do_svis;
}
/*
- * isnvis - visually encode characters, also encoding the characters
- * pointed to by `extra'
+ * Expand list of extra characters to not visually encode.
*/
-static char *
-isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+makeextralist(int flags, const char *src)
{
- char *nextra = NULL;
- visfun_t f;
+ wchar_t *dst, *d;
+ size_t len;
- _DIAGASSERT(dst != NULL);
- _DIAGASSERT(extra != NULL);
- MAKEEXTRALIST(flag, nextra, extra);
- if (!nextra) {
- if (dlen && *dlen == 0) {
- errno = ENOSPC;
- return NULL;
- }
- *dst = '\0'; /* can't create nextra, return "" */
- return dst;
- }
- f = getvisfun(flag);
- dst = (*f)(dst, dlen, c, flag, nextc, nextra);
- free(nextra);
- if (dst == NULL || (dlen && *dlen == 0)) {
- errno = ENOSPC;
+ len = strlen(src);
+ if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
return NULL;
- }
- *dst = '\0';
- return dst;
-}
-char *
-svis(char *dst, int c, int flag, int nextc, const char *extra)
-{
- return isnvis(dst, NULL, c, flag, nextc, extra);
-}
+ if (mbstowcs(dst, src, len) == (size_t)-1) {
+ size_t i;
+ for (i = 0; i < len; i++)
+ dst[i] = (wint_t)(u_char)src[i];
+ d = dst + len;
+ } else
+ d = dst + wcslen(dst);
+
+ if (flags & VIS_GLOB) {
+ *d++ = L'*';
+ *d++ = L'?';
+ *d++ = L'[';
+ *d++ = L'#';
+ }
+
+ if (flags & VIS_SP) *d++ = L' ';
+ if (flags & VIS_TAB) *d++ = L'\t';
+ if (flags & VIS_NL) *d++ = L'\n';
+ if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
+ *d = L'\0';
-char *
-snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
-{
- return isnvis(dst, &dlen, c, flag, nextc, extra);
+ return dst;
}
-
/*
- * strsvis, strsvisx - visually encode characters from src into dst
- *
- * Extra is a pointer to a \0-terminated list of characters to
- * be encoded, too. These functions are useful e. g. to
- * encode strings in such a way so that they are not interpreted
- * by a shell.
- *
- * Dst must be 4 times the size of src to account for possible
- * expansion. The length of dst, not including the trailing NULL,
- * is returned.
- *
- * Strsvisx encodes exactly len bytes from src into dst.
- * This is useful for encoding a block of data.
+ * istrsenvisx()
+ * The main internal function.
+ * All user-visible functions call this one.
*/
static int
-istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra)
+istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
+ int flags, const char *mbextra, int *cerr_ptr)
{
- int c;
- char *start;
- char *nextra = NULL;
- const unsigned char *src = (const unsigned char *)csrc;
+ wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
+ size_t len, olen;
+ uint64_t bmsk, wmsk;
+ wint_t c;
visfun_t f;
+ int clen = 0, cerr = 0, error = -1, i, shft;
+ ssize_t mbslength, maxolen;
- _DIAGASSERT(dst != NULL);
- _DIAGASSERT(src != NULL);
- _DIAGASSERT(extra != NULL);
- MAKEEXTRALIST(flag, nextra, extra);
- if (!nextra) {
- *dst = '\0'; /* can't create nextra, return "" */
- return 0;
+ _DIAGASSERT(mbdst != NULL);
+ _DIAGASSERT(mbsrc != NULL);
+ _DIAGASSERT(mbextra != NULL);
+
+ /*
+ * Input (mbsrc) is a char string considered to be multibyte
+ * characters. The input loop will read this string pulling
+ * one character, possibly multiple bytes, from mbsrc and
+ * converting each to wchar_t in src.
+ *
+ * The vis conversion will be done using the wide char
+ * wchar_t string.
+ *
+ * This will then be converted back to a multibyte string to
+ * return to the caller.
+ */
+
+ /* Allocate space for the wide char strings */
+ psrc = pdst = extra = NULL;
+ if (!mblength)
+ mblength = strlen(mbsrc);
+ if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
+ return -1;
+ if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
+ goto out;
+ dst = pdst;
+ src = psrc;
+
+ /* Use caller's multibyte conversion error flag. */
+ if (cerr_ptr)
+ cerr = *cerr_ptr;
+
+ /*
+ * Input loop.
+ * Handle up to mblength characters (not bytes). We do not
+ * stop at NULs because we may be processing a block of data
+ * that includes NULs.
+ */
+ mbslength = (ssize_t)mblength;
+ /*
+ * When inputing a single character, must also read in the
+ * next character for nextc, the look-ahead character.
+ */
+ if (mbslength == 1)
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list