@@ 1,6 1,7 @@ #include "term.h"
#include "config.h"
#include "utils.h"
+ #include "utf8.h"
#include <stdlib.h>
#include <string.h>
@@ 223,7 224,9 @@ {
if (IS_SET(term->mode, MODE_UTF8) && !IS_SET(term->mode, MODE_SIXEL))
{
- // UTF-8
+ charsize = utf8_decode(buf + n, &u, len - n);
+ if (charsize == 0)
+ break;
}
else
{
@@ 0,0 1,67 @@
+ #include <assert.h>
+
+ #include "utf8.h"
+ #include "utils.h"
+
+ #define UTF_INVALID 0xdeadbeef
+ #define UTF_SIZ 4
+
+ static const uint8_t __utf_mask[] = { 0xC0, 0x80, 0xE0, 0xF0, 0xF8};
+ static const uint8_t __utf_byte[] = { 0X80, 0, 0XC0, 0XE0, 0XF0};
+ static const uint_least32_t __utf_min[] = { 0, 0, 0x80, 0x800, 0x10000};
+ static const uint_least32_t __utf_max[] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
+
+ uint_least32_t utf8_decode_byte(char c, size_t *i)
+ {
+ assert(i != NULL);
+
+ for (*i = 0; *i < sizeof(__utf_mask); (*i)++)
+ {
+ if ((c & __utf_mask[*i]) == __utf_byte[*i])
+ return c & ~__utf_mask[*i];
+ }
+
+ return 0;
+ }
+
+ size_t utf8_decode(const char *c, uint_least32_t *u, size_t clen)
+ {
+ size_t j = 1;
+ size_t len;
+ uint_least32_t u_decoded;
+
+ *u = UTF_INVALID;
+ if (clen == 0)
+ return 0;
+
+ u_decoded = utf8_decode_byte(c[0], &len);
+ if (!BETWEEN(len, 1, UTF_SIZ))
+ return 1;
+
+ for (size_t i = 1; i < clen && j < len; i++, j++)
+ {
+ size_t type;
+
+ u_decoded = (u_decoded << 6) | utf8_decode_byte(c[i], &type);
+ if (type != 0)
+ return j;
+ }
+
+ if (j < len)
+ return 0;
+
+ *u = u_decoded;
+ utf8_validate(u, len);
+ return len;
+ }
+
+ size_t utf8_validate(uint_least32_t *u, size_t i)
+ {
+ if (!BETWEEN(*u, __utf_min[i], __utf_max[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
+ *u = UTF_INVALID;
+
+ for (i = 1; *u > __utf_max[i]; i++)
+ ;
+
+ return i;
+ }
@@ 0,0 1,11 @@
+ #ifndef UTF8_H
+ #define UTF8_H
+
+ #include <stdint.h>
+ #include <stdlib.h>
+
+ size_t utf8_decode(const char *c, uint_least32_t *u, size_t len);
+ size_t utf8_validate(uint_least32_t *u, size_t i);
+ uint_least32_t utf8_decode_byte(char c, size_t *i);
+
+ #endif /* UTF8_H */