socsvn commit: r225111 - in soc2011/zy/nvi-iconv/head:
contrib/nvi2/common contrib/nvi2/ex contrib/nvi2/vi usr.bin/vi
zy at FreeBSD.org
zy at FreeBSD.org
Sun Aug 14 14:27:56 UTC 2011
Author: zy
Date: Sun Aug 14 14:27:54 2011
New Revision: 225111
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=225111
Log:
Updates to git:0c15828; this version features:
* Stable :vsplit support;
* Standard and general file encoding detection (UTF-8 & UTF-16);
* UTF-16 support.
The patch is ready for testing in the community.
Added:
soc2011/zy/nvi-iconv/head/contrib/nvi2/common/encoding.c
Modified:
soc2011/zy/nvi-iconv/head/contrib/nvi2/common/conv.c
soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.c
soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.h
soc2011/zy/nvi-iconv/head/contrib/nvi2/common/extern.h
soc2011/zy/nvi-iconv/head/contrib/nvi2/common/line.c
soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex.h
soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex_write.c
soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vi.c
soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vs_refresh.c
soc2011/zy/nvi-iconv/head/usr.bin/vi/Makefile
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/common/conv.c
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/common/conv.c Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/common/conv.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -10,7 +10,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "$Id: conv.c,v 1.28 2011/07/16 14:40:06 zy Exp $ (Berkeley) $Date: 2011/07/16 14:40:06 $";
+static const char sccsid[] = "$Id: conv.c,v 1.29 2011/08/13 12:53:23 zy Exp $ (Berkeley) $Date: 2011/08/13 12:53:23 $";
#endif /* not lint */
#include <sys/types.h>
@@ -23,6 +23,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <strings.h>
#include <unistd.h>
#include "common.h"
@@ -89,6 +90,10 @@
default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
size_t *tolen, CHAR_T **dst, char *enc)
{
+ /* XXX UTF-16 linesep hack */
+ if (!strncasecmp(enc, "utf-16", 6) && len % 2)
+ len -= 1;
+
int i = 0, j;
CHAR_T **tostr = &cw->b_wc1;
size_t *blen = &cw->blen1;
Added: soc2011/zy/nvi-iconv/head/contrib/nvi2/common/encoding.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/common/encoding.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 2011
+ * Zhihao Yuan. All rights reserved.
+ *
+ * See the LICENSE file for redistribution information.
+ */
+
+#ifndef lint
+static const char sccsid[] = "$Id: encoding.c,v 1.2 2011/08/13 22:58:03 zy Exp $ (Berkeley) $Date: 2011/08/13 22:58:03 $";
+#endif /* not lint */
+
+#include <sys/types.h>
+
+#define F 0 /* character never appears in text */
+#define T 1 /* character appears in plain ASCII text */
+#define I 2 /* character appears in ISO-8859 text */
+#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+
+static char text_chars[256] = {
+ /* BEL BS HT LF FF CR */
+ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
+ /* ESC */
+ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
+ /* NEL */
+ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
+};
+
+/*
+ * looks_utf8 --
+ * Decide whether some text looks like UTF-8. Returns:
+ *
+ * -1: invalid UTF-8
+ * 0: uses odd control characters, so doesn't look like text
+ * 1: 7-bit text
+ * 2: definitely UTF-8 text (valid high-bit set bytes)
+ *
+ * Based on RFC 3629. UTF-8 with BOM is not accepted.
+ *
+ * PUBLIC: int looks_utf8 __P((const char *, size_t));
+ */
+int
+looks_utf8(const char *buf, size_t nbytes)
+{
+ size_t i;
+ int n;
+ int gotone = 0, ctrl = 0;
+
+ for (i = 0; i < nbytes; i++) {
+ if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
+ /*
+ * Even if the whole file is valid UTF-8 sequences,
+ * still reject it if it uses weird control characters.
+ */
+
+ if (text_chars[(u_char)buf[i]] != T)
+ ctrl = 1;
+ } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
+ return -1;
+ } else { /* 11xxxxxx begins UTF-8 */
+ int following;
+
+ if ((buf[i] & 0x20) == 0) /* 110xxxxx */
+ if ((buf[1] & 0x3e)) /* C0, C1 */
+ following = 1;
+ else return -1;
+ else if ((buf[i] & 0x10) == 0) /* 1110xxxx */
+ following = 2;
+ else if ((buf[i] & 0x08) == 0) /* 11110xxx */
+ if ((u_char)buf[i] < 0xf5 || 0xf7 < (u_char)buf[i])
+ following = 3;
+ else return -1; /* F5, F6, F7 */
+ else
+ return -1;
+
+ for (n = 0; n < following; n++) {
+ i++;
+ if (i >= nbytes)
+ goto done;
+
+ if (buf[i] & 0x40) /* 10xxxxxx */
+ return -1;
+ }
+
+ gotone = 1;
+ }
+ }
+done:
+ return ctrl ? 0 : (gotone ? 2 : 1);
+}
+
+/*
+ * looks_utf16 --
+ * Decide whether some text looks like UTF-16. Returns:
+ *
+ * 0: invalid UTF-16
+ * 1: Little-endian UTF-16
+ * 2: Big-endian UTF-16
+ *
+ * PUBLIC: int looks_utf16 __P((const char *, size_t));
+ */
+int
+looks_utf16(const char *buf, size_t nbytes)
+{
+ int bigend;
+ size_t i;
+ unsigned int c;
+ int bom;
+ int following = 0;
+
+ if (nbytes < 2)
+ return 0;
+
+ bom = ((u_char)buf[0] << 8) + (u_char)buf[1];
+ if (bom == 0xfffe)
+ bigend = 0;
+ else if (bom == 0xfeff)
+ bigend = 1;
+ else
+ return 0;
+
+ for (i = 2; i + 1 < nbytes; i += 2) {
+ if (bigend)
+ c = (u_char)buf[i + 1] + 256 * (u_char)buf[i];
+ else
+ c = (u_char)buf[i] + 256 * (u_char)buf[i + 1];
+
+ if (!following)
+ if (c < 0xD800 || c > 0xDFFF)
+ if (c < 128 && text_chars[(size_t)c] != T)
+ return 0;
+ else
+ following = 0;
+ else if (!(0xD800 <= c && c <= 0xDBFF))
+ return 0;
+ else {
+ following = 1;
+ continue;
+ }
+ else if (!(0xDC00 <= c && c <= 0xDFFF))
+ return 0;
+ }
+
+ return 1 + bigend;
+}
+
+#undef F
+#undef T
+#undef I
+#undef X
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.c
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.c Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -10,7 +10,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "$Id: exf.c,v 10.52 2011/07/18 16:58:54 zy Exp $ (Berkeley) $Date: 2011/07/18 16:58:54 $";
+static const char sccsid[] = "$Id: exf.c,v 10.53 2011/07/20 00:38:28 zy Exp $ (Berkeley) $Date: 2011/07/20 00:38:28 $";
#endif /* not lint */
#include <sys/param.h>
@@ -39,6 +39,7 @@
static int file_backup __P((SCR *, char *, char *));
static void file_cinit __P((SCR *));
+static void file_encinit __P((SCR *));
static void file_comment __P((SCR *));
static int file_spath __P((SCR *, FREF *, struct stat *, int *));
@@ -404,12 +405,12 @@
sp->ep = ep;
sp->frp = frp;
+ /* Detect and set the file encoding */
+ file_encinit(sp);
+
/* Set the initial cursor position, queue initial command. */
file_cinit(sp);
- /* Report conversion errors again. */
- F_CLR(sp, SC_CONV_ERROR);
-
/* Redraw the screen from scratch, schedule a welcome message. */
F_SET(sp, SC_SCR_REFORMAT | SC_STATUS);
@@ -721,7 +722,7 @@
free(ep->rcv_path);
if (ep->rcv_mpath != NULL)
free(ep->rcv_mpath);
- if (ep->c_lp != NULL)
+ if (ep->c_blen > 0)
free(ep->c_lp);
free(ep);
@@ -1207,12 +1208,63 @@
}
if (estr)
msgq_str(sp, M_SYSERR, estr, "%s");
+ if (d != NULL)
+ free(d);
if (bp != NULL)
FREE_SPACE(sp, bp, blen);
return (1);
}
/*
+ * file_encinit --
+ * Read the first line and set the O_FILEENCODING.
+ */
+static void
+file_encinit(SCR *sp)
+{
+#if defined(USE_WIDECHAR) && defined(USE_ICONV)
+ size_t len;
+ char *p;
+ size_t blen = 0;
+ char buf[4096]; /* not need to be '\0'-terminated */
+ recno_t ln = 1;
+
+ while (!db_rget(sp, ln++, &p, &len)) {
+ if (blen + len > sizeof(buf))
+ len = sizeof(buf) - blen;
+ memcpy(buf + blen, p, len);
+ blen += len;
+ if (blen == sizeof(buf))
+ break;
+ else
+ buf[blen++] = '\n';
+ }
+
+ if (looks_utf8(buf, blen) > 1)
+ o_set(sp, O_FILEENCODING, OS_STRDUP, "utf-8", 0);
+ else {
+ int st = looks_utf16(buf, blen);
+ if (st > 0) {
+ char *np;
+ size_t nlen;
+ db_rget(sp, 1, &p, &len);
+ nlen = len-2;
+ GET_SPACE_GOTOC(sp, np, nlen, nlen);
+ memcpy(sp->ep->_bom, p, 2);
+ memcpy(np, p+2, len-2);
+ db_rset(sp, 1, np, len-2); /* store w/o the BOM */
+ }
+ if (st == 1)
+ o_set(sp, O_FILEENCODING, OS_STRDUP, "utf-16le", 0);
+ else if (st == 2)
+ o_set(sp, O_FILEENCODING, OS_STRDUP, "utf-16be", 0);
+ }
+ /* Fallback to locale encoding */
+alloc_err:;
+#endif
+}
+
+/*
* file_comment --
* Skip the first comment.
*/
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.h
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.h Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/common/exf.h Sun Aug 14 14:27:54 2011 (r225111)
@@ -6,7 +6,7 @@
*
* See the LICENSE file for redistribution information.
*
- * @(#)exf.h 10.7 (Berkeley) 7/9/96
+ * $Id: exf.h,v 10.8 2011/08/13 17:59:41 zy Exp $ (Berkeley) $Date: 2011/08/13 17:59:41 $
*/
/* Undo direction. */
/*
@@ -17,6 +17,10 @@
int refcnt; /* Reference count. */
/* Underlying database state. */
+ union {
+ uint16_t bom; /* Byte-order-mark */
+ char _bom[2];
+ };
DB *db; /* File db structure. */
CHAR_T *c_lp; /* Cached line. */
size_t c_len; /* Cached line length. */
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/common/extern.h
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/common/extern.h Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/common/extern.h Sun Aug 14 14:27:54 2011 (r225111)
@@ -7,6 +7,8 @@
void text_lfree __P((TEXTH *));
void text_free __P((TEXT *));
int del __P((SCR *, MARK *, MARK *, int));
+int looks_utf8 __P((const char *, size_t));
+int looks_utf16 __P((const char *, size_t));
FREF *file_add __P((SCR *, char *));
int file_init __P((SCR *, FREF *, char *, int));
int file_end __P((SCR *, EXF *, int));
@@ -34,6 +36,8 @@
int db_set __P((SCR *, recno_t, CHAR_T *, size_t));
int db_exist __P((SCR *, recno_t));
int db_last __P((SCR *, recno_t *));
+int db_rget __P((SCR *, recno_t, char **, size_t *));
+int db_rset __P((SCR *, recno_t, char *, size_t));
void db_err __P((SCR *, recno_t));
int log_init __P((SCR *, EXF *));
int log_end __P((SCR *, EXF *));
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/common/line.c
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/common/line.c Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/common/line.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -10,7 +10,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "$Id: line.c,v 10.24 2011/07/18 16:10:48 zy Exp $ (Berkeley) $Date: 2011/07/18 16:10:48 $";
+static const char sccsid[] = "$Id: line.c,v 10.26 2011/08/12 12:36:41 zy Exp $ (Berkeley) $Date: 2011/08/12 12:36:41 $";
#endif /* not lint */
#include <sys/types.h>
@@ -580,6 +580,72 @@
}
/*
+ * db_rget --
+ * Retrieve a raw line from database. No cache, no conversion.
+ *
+ * PUBLIC: int db_rget __P((SCR *, recno_t, char **, size_t *));
+ */
+int
+db_rget(
+ SCR *sp,
+ recno_t lno, /* Line number. */
+ char **pp, /* Pointer store. */
+ size_t *lenp) /* Length store. */
+{
+ DBT data, key;
+ EXF *ep;
+
+ /* Check for no underlying file. */
+ if ((ep = sp->ep) == NULL)
+ return (1);
+
+ /* Get the line from the underlying database. */
+ key.data = &lno;
+ key.size = sizeof(lno);
+ if (ep->db->get(ep->db, &key, &data, 0))
+ /* We do not report error, and do not ensure the size! */
+ return (1);
+
+ if (lenp != NULL)
+ *lenp = data.size;
+ if (pp != NULL)
+ *pp = data.data;
+ return (0);
+}
+
+/*
+ * db_rset --
+ * Store a line in the file. No log, no conversion.
+ *
+ * PUBLIC: int db_rset __P((SCR *, recno_t, char *, size_t));
+ */
+int
+db_rset(
+ SCR *sp,
+ recno_t lno,
+ char *p,
+ size_t len)
+{
+ DBT data, key;
+ EXF *ep;
+
+ /* Check for no underlying file. */
+ if ((ep = sp->ep) == NULL)
+ return (1);
+
+ /* Update file. */
+ key.data = &lno;
+ key.size = sizeof(lno);
+ data.data = p;
+ data.size = len;
+ if (ep->db->put(ep->db, &key, &data, 0) == -1)
+ /* We do not report error, and do not ensure the size! */
+ return (1);
+
+ return (0);
+}
+
+/*
* db_err --
* Report a line error.
*
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex.h
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex.h Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex.h Sun Aug 14 14:27:54 2011 (r225111)
@@ -133,7 +133,7 @@
#define E_C_PRINT 0x01000 /* p flag. */
u_int16_t iflags; /* User input information. */
-#define __INUSE2 0x000007ff /* Same name space as EXCMDLIST. */
+#define __INUSE2 0x000004ff /* Same name space as EXCMDLIST. */
#define E_BLIGNORE 0x00000800 /* Ignore blank lines. */
#define E_NAMEDISCARD 0x00001000 /* Free/discard the name. */
#define E_NOAUTO 0x00002000 /* Don't do autoprint output. */
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex_write.c
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex_write.c Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/ex/ex_write.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -10,7 +10,7 @@
#include "config.h"
#ifndef lint
-static const char sccsid[] = "$Id: ex_write.c,v 10.38 2001/06/25 15:19:22 skimo Exp $ (Berkeley) $Date: 2001/06/25 15:19:22 $";
+static const char sccsid[] = "$Id: ex_write.c,v 10.39 2011/08/13 18:28:15 zy Exp $ (Berkeley) $Date: 2011/08/13 18:28:15 $";
#endif /* not lint */
#include <sys/types.h>
@@ -25,6 +25,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <strings.h>
#include <unistd.h>
#include "../common/common.h"
@@ -287,6 +288,7 @@
CHAR_T *p;
char *f;
size_t flen;
+ int isutf16;
gp = sp->gp;
fline = fm->lno;
@@ -315,7 +317,12 @@
ccnt = 0;
lcnt = 0;
msg = "253|Writing...";
- if (tline != 0)
+
+ isutf16 = !strncasecmp(O_STR(sp, O_FILEENCODING), "utf-16", 6);
+
+ if (tline != 0) {
+ if (sp->ep->bom && fwrite(&sp->ep->bom, 2, 1, fp) != 1)
+ goto err;
for (; fline <= tline; ++fline, ++lcnt) {
/* Caller has to provide any interrupt message. */
if ((lcnt + 1) % INTERRUPT_CHECK == 0) {
@@ -333,10 +340,13 @@
if (fwrite(f, 1, flen, fp) != flen)
goto err;
ccnt += len;
+ if (isutf16 && putc('\0', fp) != '\0')
+ break; /* UTF-16 uses '000a' as EOL */
if (putc('\n', fp) != '\n')
break;
++ccnt;
}
+ }
if (fflush(fp))
goto err;
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vi.c
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vi.c Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vi.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -403,6 +403,7 @@
if (F_ISSET(gp, G_SRESTART) || F_ISSET(sp, SC_EX)) {
*spp = sp;
v_dtoh(sp);
+ gp->scr_discard(sp, NULL);
break;
}
}
@@ -1015,6 +1016,9 @@
}
CIRCLEQ_REMOVE(&gp->dq, tsp, q);
CIRCLEQ_INSERT_TAIL(&gp->hq, tsp, q);
+ /* XXXX Change if hidden screens per window */
+ tsp->gp = 0;
+ gp->scr_discard(tsp, NULL);
}
/* Move current screen back to the display queue. */
Modified: soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vs_refresh.c
==============================================================================
--- soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vs_refresh.c Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/contrib/nvi2/vi/vs_refresh.c Sun Aug 14 14:27:54 2011 (r225111)
@@ -345,7 +345,7 @@
tmp.lno = LNO;
tmp.coff = HMAP->coff;
tmp.soff = 1;
- lcnt = vs_sm_nlines(sp, &tmp, lastline+1, sp->t_rows);
+ lcnt = vs_sm_nlines(sp, &tmp, lastline, sp->t_rows);
if (lcnt < HALFTEXT(sp)) {
if (vs_sm_fill(sp, lastline, P_BOTTOM))
return (1);
Modified: soc2011/zy/nvi-iconv/head/usr.bin/vi/Makefile
==============================================================================
--- soc2011/zy/nvi-iconv/head/usr.bin/vi/Makefile Sun Aug 14 13:37:38 2011 (r225110)
+++ soc2011/zy/nvi-iconv/head/usr.bin/vi/Makefile Sun Aug 14 14:27:54 2011 (r225111)
@@ -57,8 +57,9 @@
SRCS+= cl_bsd.c cl_funcs.c cl_main.c cl_read.c cl_screen.c cl_term.c
# General sources.
-SRCS+= cut.c conv.c delete.c exf.c key.c line.c log.c main.c mark.c msg.c \
- options.c options_f.c put.c screen.c search.c seq.c recover.c util.c
+SRCS+= cut.c conv.c delete.c encoding.c exf.c key.c line.c log.c main.c \
+ mark.c msg.c options.c options_f.c put.c screen.c search.c seq.c \
+ recover.c util.c
# Ex source.
SRCS+= ex.c ex_abbrev.c ex_append.c ex_args.c ex_argv.c ex_at.c ex_bang.c \
More information about the svn-soc-all
mailing list