git: 9971e6aff1be - main - vt: Improve multi lingual word separation.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 27 Jun 2022 08:18:08 UTC
The branch main has been updated by hselasky: URL: https://cgit.FreeBSD.org/src/commit/?id=9971e6aff1bef3d456172c41a3df3ce7266517cf commit 9971e6aff1bef3d456172c41a3df3ce7266517cf Author: Hans Petter Selasky <hselasky@FreeBSD.org> AuthorDate: 2022-06-25 09:17:44 +0000 Commit: Hans Petter Selasky <hselasky@FreeBSD.org> CommitDate: 2022-06-27 08:17:16 +0000 vt: Improve multi lingual word separation. Suggested by: Tomoaki AOKI <junchoon@dec.sakura.ne.jp> Differential Revision: https://reviews.freebsd.org/D35552 PR: 263084 MFC after: 1 week Sponsored by: NVIDIA Networking --- sys/dev/vt/vt_buf.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/sys/dev/vt/vt_buf.c b/sys/dev/vt/vt_buf.c index fa6c7c8fec5f..b83db85f1cdb 100644 --- a/sys/dev/vt/vt_buf.c +++ b/sys/dev/vt/vt_buf.c @@ -747,6 +747,29 @@ vtbuf_get_marked_len(struct vt_buf *vb) return (sz * sizeof(term_char_t)); } +static bool +tchar_is_word_separator(term_char_t ch) +{ + /* List of unicode word separator characters: */ + switch (TCHAR_CHARACTER(ch)) { + case 0x0020: /* SPACE */ + case 0x180E: /* MONGOLIAN VOWEL SEPARATOR */ + case 0x2002: /* EN SPACE (nut) */ + case 0x2003: /* EM SPACE (mutton) */ + case 0x2004: /* THREE-PER-EM SPACE (thick space) */ + case 0x2005: /* FOUR-PER-EM SPACE (mid space) */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x200B: /* ZERO WIDTH SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ + return (true); + default: + return (false); + } +} + void vtbuf_extract_marked(struct vt_buf *vb, term_char_t *buf, int sz) { @@ -779,7 +802,7 @@ vtbuf_extract_marked(struct vt_buf *vb, term_char_t *buf, int sz) if (r != e.tp_row) { /* Trim trailing word separators, if any. */ for (; i != j; i--) { - if (TCHAR_CHARACTER(buf[i - 1]) != ' ') + if (!tchar_is_word_separator(buf[i - 1])) break; } /* Add newline character as expected by TTY. */ @@ -824,7 +847,7 @@ vtbuf_set_mark(struct vt_buf *vb, int type, int col, int row) vtbuf_wth(vb, row); r = vb->vb_rows[vb->vb_mark_start.tp_row]; for (i = col; i >= 0; i --) { - if (TCHAR_CHARACTER(r[i]) == ' ') { + if (tchar_is_word_separator(r[i])) { vb->vb_mark_start.tp_col = i + 1; break; } @@ -833,7 +856,7 @@ vtbuf_set_mark(struct vt_buf *vb, int type, int col, int row) if (i == -1) vb->vb_mark_start.tp_col = 0; for (i = col; i < vb->vb_scr_size.tp_col; i++) { - if (TCHAR_CHARACTER(r[i]) == ' ') { + if (tchar_is_word_separator(r[i])) { vb->vb_mark_end.tp_col = i; break; }