356693dd2dc9b194791e10848aa7b6dcf2bec784 — Ne02ptzero 1 year, 5 months ago 446e94e
NEW: Beginning of UTF8 support in the read part of the TTY

Signed-off-by: Ne02ptzero <louis@ne02ptzero.me>
3 files changed, 82 insertions(+), 1 deletions(-)

M term.c
A utf8.c
A utf8.h
M term.c => term.c +4 -1
@@ 1,6 1,7 @@ #include "term.h"
  #include "config.h"
  #include "utils.h"
+ #include "utf8.h"
  
  #include <stdlib.h>
  #include <string.h>


@@ 223,7 224,9 @@ {
          if (IS_SET(term->mode, MODE_UTF8) && !IS_SET(term->mode, MODE_SIXEL))
          {
-             // UTF-8
+             charsize = utf8_decode(buf + n, &u, len - n);
+             if (charsize == 0)
+                 break;
          }
          else
          {

A utf8.c => utf8.c +67 -0
@@ 0,0 1,67 @@
+ #include <assert.h>
+ 
+ #include "utf8.h"
+ #include "utils.h"
+ 
+ #define UTF_INVALID 0xdeadbeef
+ #define UTF_SIZ 4
+ 
+ static const uint8_t        __utf_mask[] = {    0xC0, 0x80,  0xE0,   0xF0,     0xF8};
+ static const uint8_t        __utf_byte[] = {    0X80,    0,  0XC0,   0XE0,     0XF0};
+ static const uint_least32_t __utf_min[]  = {       0,    0,  0x80,  0x800,  0x10000};
+ static const uint_least32_t __utf_max[]  = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
+ 
+ uint_least32_t utf8_decode_byte(char c, size_t *i)
+ {
+     assert(i != NULL);
+ 
+     for (*i = 0; *i < sizeof(__utf_mask); (*i)++)
+     {
+         if ((c & __utf_mask[*i]) == __utf_byte[*i])
+             return c & ~__utf_mask[*i];
+     }
+ 
+     return 0;
+ }
+ 
+ size_t utf8_decode(const char *c, uint_least32_t *u, size_t clen)
+ {
+     size_t              j = 1;
+     size_t              len;
+     uint_least32_t      u_decoded;
+ 
+     *u = UTF_INVALID;
+     if (clen == 0)
+         return 0;
+ 
+     u_decoded = utf8_decode_byte(c[0], &len);
+     if (!BETWEEN(len, 1, UTF_SIZ))
+         return 1;
+ 
+     for (size_t i = 1; i < clen && j < len; i++, j++)
+     {
+         size_t  type;
+ 
+         u_decoded = (u_decoded << 6) | utf8_decode_byte(c[i], &type);
+         if (type != 0)
+             return j;
+     }
+ 
+     if (j < len)
+         return 0;
+ 
+     *u = u_decoded;
+     utf8_validate(u, len);
+     return len;
+ }
+ 
+ size_t utf8_validate(uint_least32_t *u, size_t i)
+ {
+     if (!BETWEEN(*u, __utf_min[i], __utf_max[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
+         *u = UTF_INVALID;
+ 
+     for (i = 1; *u > __utf_max[i]; i++)
+         ;
+ 
+     return i;
+ }

A utf8.h => utf8.h +11 -0
@@ 0,0 1,11 @@
+ #ifndef UTF8_H
+ #define UTF8_H
+ 
+ #include <stdint.h>
+ #include <stdlib.h>
+ 
+ size_t utf8_decode(const char *c, uint_least32_t *u, size_t len);
+ size_t utf8_validate(uint_least32_t *u, size_t i);
+ uint_least32_t utf8_decode_byte(char c, size_t *i);
+ 
+ #endif /* UTF8_H */