From ac9b7c934c2ec48de923505a5018cb03d56c11d2 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 10 Jan 2017 14:15:43 +0200 Subject: [PATCH] 7760 uts: tem should be able to display UTF-8 Reviewed by: Marcel Telka Reviewed by: Yuri Pankov Approved by: Robert Mustacchi --- usr/src/uts/common/io/tem.c | 3 + usr/src/uts/common/io/tem_safe.c | 114 +++++++++++++++++++++++++++++++++++++- usr/src/uts/common/sys/tem_impl.h | 4 ++ 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/usr/src/uts/common/io/tem.c b/usr/src/uts/common/io/tem.c index c4f26a4426..75e75d39b6 100644 --- a/usr/src/uts/common/io/tem.c +++ b/usr/src/uts/common/io/tem.c @@ -242,6 +242,9 @@ tem_internal_init(struct tem_vt_state *ptem, cred_t *credp, tem_safe_reset_display(ptem, credp, CALLED_FROM_NORMAL, clear_screen, init_color); + ptem->tvs_utf8_left = 0; + ptem->tvs_utf8_partial = 0; + tem_safe_get_color(ptem, &fg, &bg, TEM_ATTR_SCREEN_REVERSE); for (i = 0; i < height; i++) for (j = 0; j < width; j++) { diff --git a/usr/src/uts/common/io/tem_safe.c b/usr/src/uts/common/io/tem_safe.c index 3540cb085d..0312884465 100644 --- a/usr/src/uts/common/io/tem_safe.c +++ b/usr/src/uts/common/io/tem_safe.c @@ -244,6 +244,118 @@ tem_safe_polled_write( tem_safe_terminal_emulate(tem, buf, len, NULL, CALLED_FROM_STANDALONE); } +/* Process partial UTF-8 sequence. */ +static void +tem_safe_input_partial(struct tem_vt_state *tem, cred_t *credp, + enum called_from called_from) +{ + int i; + uint8_t c; + + if (tem->tvs_utf8_left == 0) + return; + + for (i = 0; i < sizeof (tem->tvs_utf8_partial); i++) { + c = (tem->tvs_utf8_partial >> (24 - (i << 3))) & 0xff; + if (c != 0) { + tem_safe_parse(tem, c, credp, called_from); + } + } + tem->tvs_utf8_left = 0; + tem->tvs_utf8_partial = 0; +} + +/* + * Handle UTF-8 sequences. + */ +static void +tem_safe_input_byte(struct tem_vt_state *tem, uchar_t c, cred_t *credp, + enum called_from called_from) +{ + /* + * Check for UTF-8 code points. In case of error fall back to + * 8-bit code. As we only have 8859-1 fonts for console, this will set + * the limits on what chars we actually can display, therefore we + * have to return to this code once we have solved the font issue. + */ + if ((c & 0x80) == 0x00) { + /* One-byte sequence. */ + tem_safe_input_partial(tem, credp, called_from); + tem_safe_parse(tem, c, credp, called_from); + return; + } + if ((c & 0xe0) == 0xc0) { + /* Two-byte sequence. */ + tem_safe_input_partial(tem, credp, called_from); + tem->tvs_utf8_left = 1; + tem->tvs_utf8_partial = c; + return; + } + if ((c & 0xf0) == 0xe0) { + /* Three-byte sequence. */ + tem_safe_input_partial(tem, credp, called_from); + tem->tvs_utf8_left = 2; + tem->tvs_utf8_partial = c; + return; + } + if ((c & 0xf8) == 0xf0) { + /* Four-byte sequence. */ + tem_safe_input_partial(tem, credp, called_from); + tem->tvs_utf8_left = 3; + tem->tvs_utf8_partial = c; + return; + } + if ((c & 0xc0) == 0x80) { + /* Invalid state? */ + if (tem->tvs_utf8_left == 0) { + tem_safe_parse(tem, c, credp, called_from); + return; + } + tem->tvs_utf8_left--; + tem->tvs_utf8_partial = (tem->tvs_utf8_partial << 8) | c; + if (tem->tvs_utf8_left == 0) { + tem_char_t v, u; + uint8_t b; + + /* + * Transform the sequence of 2 to 4 bytes to + * unicode number. + */ + v = 0; + u = tem->tvs_utf8_partial; + b = (u >> 24) & 0xff; + if (b != 0) { /* Four-byte sequence */ + v = b & 0x07; + b = (u >> 16) & 0xff; + v = (v << 6) | (b & 0x3f); + b = (u >> 8) & 0xff; + v = (v << 6) | (b & 0x3f); + b = u & 0xff; + v = (v << 6) | (b & 0x3f); + } else if ((b = (u >> 16) & 0xff) != 0) { + v = b & 0x0f; /* Three-byte sequence */ + b = (u >> 8) & 0xff; + v = (v << 6) | (b & 0x3f); + b = u & 0xff; + v = (v << 6) | (b & 0x3f); + } else if ((b = (u >> 8) & 0xff) != 0) { + v = b & 0x1f; /* Two-byte sequence */ + b = u & 0xff; + v = (v << 6) | (b & 0x3f); + } + + /* Use '?' as replacement if needed. */ + if (v > 0xff) + v = '?'; + tem_safe_parse(tem, v, credp, called_from); + tem->tvs_utf8_partial = 0; + } + return; + } + /* Anything left is illegal in UTF-8 sequence. */ + tem_safe_input_partial(tem, credp, called_from); + tem_safe_parse(tem, c, credp, called_from); +} /* * This is the main entry point into the terminal emulator. @@ -270,7 +382,7 @@ tem_safe_terminal_emulate( VIS_HIDE_CURSOR, credp, called_from); for (; len > 0; len--, buf++) - tem_safe_parse(tem, *buf, credp, called_from); + tem_safe_input_byte(tem, *buf, credp, called_from); /* * Send the data we just got to the framebuffer. diff --git a/usr/src/uts/common/sys/tem_impl.h b/usr/src/uts/common/sys/tem_impl.h index a5a65e4a52..181437570e 100644 --- a/usr/src/uts/common/sys/tem_impl.h +++ b/usr/src/uts/common/sys/tem_impl.h @@ -107,6 +107,7 @@ extern "C" { #define BUF_LEN 160 /* Two lines of data can be processed at a time */ +typedef uint32_t tem_char_t; /* 32bit char to support UTF-8 */ typedef uint8_t text_color_t; typedef struct tem_color { @@ -176,6 +177,9 @@ struct tem_vt_state { text_color_t *tvs_bg_buf; /* bg_color attribute cache */ int tvs_color_buf_size; + unsigned tvs_utf8_left; /* UTF-8 code points */ + tem_char_t tvs_utf8_partial; /* UTF-8 char being completed */ + boolean_t tvs_isactive; int tvs_initialized; /* initialization flag */ -- 2.11.4.GIT