Commit: 21b5b3ee81f2061fa0184037cfc9768c560613e2
Parent: a123a12fde70712546102479d03566c37d248d4e
Author: Randy Palamar
Date: Sun, 7 Jul 2024 17:51:33 -0600
properly handle unicode and cursor wrapping
Diffstat:
M | config.def.h | | | 2 | ++ |
M | main.c | | | 2 | +- |
M | os_unix.c | | | 6 | +----- |
M | terminal.c | | | 88 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
M | util.c | | | 66 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | util.h | | | 12 | ++++++------ |
M | vtgl.c | | | 10 | +++++----- |
7 files changed, 140 insertions(+), 46 deletions(-)
diff --git a/config.def.h b/config.def.h
@@ -1,6 +1,8 @@
/* See LICENSE for copyright details */
static s8 g_default_title = s8("vtgl");
+static u32 g_default_fontsize = 36;
+
static u8 g_tabstop = 8;
static Colour base16_colours[16] = {
diff --git a/main.c b/main.c
@@ -286,7 +286,7 @@ main(void)
"/usr/share/fonts/gofont/Go-Mono.ttf",
};
init_window(&term, memory);
- init_fonts(&term, font_paths, ARRAY_COUNT(font_paths), 48, &memory);
+ init_fonts(&term, font_paths, ARRAY_COUNT(font_paths), g_default_fontsize, &memory);
cursor_reset(&term);
diff --git a/os_unix.c b/os_unix.c
@@ -281,11 +281,7 @@ os_child_put_s8(os_child c, s8 text)
static void
os_child_put_char(os_child c, u32 cp)
{
- /* TODO: encode to utf-8 */
- ASSERT(cp <= 0x7f);
- u8 character = (u8)cp;
- s8 text = {.len = 1, .data = &character};
- os_child_put_s8(c, text);
+ os_child_put_s8(c, utf8_encode(cp));
}
static void
diff --git a/terminal.c b/terminal.c
@@ -56,6 +56,20 @@ peek(s8 raw, size i)
}
static u32
+get_utf8(s8 *raw)
+{
+ u32 state = 0, cp;
+ size off = 0;
+ while (off < raw->len) {
+ if (!utf8decode(&state, &cp, raw->data[off++])) {
+ *raw = consume(*raw, off);
+ return cp;
+ }
+ }
+ return (u32)-1;
+}
+
+static u32
get_ascii(s8 *raw)
{
ASSERT(raw->len > 0);
@@ -441,6 +455,7 @@ handle_csi(Term *t, s8 *raw)
u8 next;
switch (csi.mode) {
+ case 'G': cursor_move_to(t, t->cursor.row, csi.argv[0] - 1); break;
case 'H': cursor_move_to(t, csi.argv[0] - 1, csi.argv[1] - 1); break;
case 'J': erase_in_display(t, &csi); break;
case 'K': erase_in_line(t, &csi); break;
@@ -619,6 +634,8 @@ check_if_escape_moves_cursor(Term *t, s8 *raw)
{
enum escape_moves_cursor_result result = EMC_NORMAL_RETURN;
u32 cp = get_ascii(raw);
+ if (raw->len == 0)
+ return EMC_NEEDS_MORE_BYTES;
switch(cp) {
case '[':
result = check_if_csi_moves_cursor(t, raw);
@@ -654,7 +671,7 @@ split_raw_input_to_lines(Term *t, s8 raw)
__m128i esc = _mm_set1_epi8(0x1B);
__m128i uni = _mm_set1_epi8(0x80);
- #define SPLIT_LONG 4096
+ #define SPLIT_LONG 4096L
while (raw.len) {
__m128i hasutf8 = _mm_setzero_si128();
size count = raw.len > SPLIT_LONG ? SPLIT_LONG : raw.len;
@@ -684,26 +701,28 @@ split_raw_input_to_lines(Term *t, s8 raw)
lb->buf[lb->widx].has_unicode |= _mm_movemask_epi8(hasutf8);
raw = consume(raw, data - raw.data);
- if (peek(raw, 0) == 0x1B) {
- s8 old = raw;
- raw = consume(raw, 1);
- switch (check_if_escape_moves_cursor(t, &raw)) {
- case EMC_NEEDS_MORE_BYTES:
- t->unprocessed_bytes = old.len;
- return parsed_lines;
- case EMC_CURSOR_MOVED:
- parsed_lines++;
- feed_line(lb, old.data, t->cursor.state);
- break;
- default: break;
- }
- } else {
- u32 c = get_ascii(&raw);
- if (c == '\n') {
- parsed_lines++;
- feed_line(lb, raw.data, t->cursor.state);
- } else if (c & 0x80) {
- lb->buf[lb->widx].has_unicode = 1;
+ if (raw.len) {
+ if (peek(raw, 0) == 0x1B) {
+ s8 old = raw;
+ raw = consume(raw, 1);
+ switch (check_if_escape_moves_cursor(t, &raw)) {
+ case EMC_NEEDS_MORE_BYTES:
+ t->unprocessed_bytes = old.len;
+ return parsed_lines;
+ case EMC_CURSOR_MOVED:
+ parsed_lines++;
+ feed_line(lb, old.data, t->cursor.state);
+ break;
+ default: break;
+ }
+ } else {
+ u32 c = get_ascii(&raw);
+ if (c == '\n') {
+ parsed_lines++;
+ feed_line(lb, raw.data, t->cursor.state);
+ } else if (c & 0x80) {
+ lb->buf[lb->widx].has_unicode = 1;
+ }
}
}
@@ -754,9 +773,15 @@ push_line(Term *t, Line *line, Arena a)
t->cursor.state = line->cursor_state;
Cell *c;
+ b32 wrap_next = 0;
while (l.len) {
- /* TODO: handle unicode case */
- u32 cp = get_ascii(&l);
+ u32 cp;
+ if (line->has_unicode) cp = get_utf8(&l);
+ else cp = get_ascii(&l);
+
+ /* TODO: handle error case */
+ ASSERT(cp != (u32)-1);
+
switch (cp) {
case 0x1B: handle_escape(t, &l, a); break;
case '\r': t->cursor.col = 0; break;
@@ -766,15 +791,22 @@ push_line(Term *t, Line *line, Arena a)
cursor_move_to(t, t->cursor.row, t->cursor.col - 1);
break;
default:
+ if (wrap_next)
+ cursor_step_column(t, 1);
+
/* TODO properly make sure characters are printable */
CLAMP(cp, ' ', '~');
c = &t->fb.rows[t->cursor.row][t->cursor.col];
c->cp = cp;
c->style = t->cursor.state;
- /* TODO: properly advance cursor */
- cursor_step_column(t, 1);
+
+ wrap_next = t->cursor.col + 1 == t->size.w;
+ if (!wrap_next)
+ cursor_step_column(t, 1);
}
}
+ if (wrap_next && (t->cursor.row != t->size.h - 1))
+ cursor_step_column(t, 1);
}
static size
@@ -794,12 +826,10 @@ blit_lines(Term *t, Arena a)
if (line_count > t->log_lines.filled)
line_count = t->log_lines.filled;
+ /* NOTE: for now we assume that we blit the whole screen everytime */
/* TODO: Performance!!! */
- fb_clear_region(t, 0, t->size.h, 0, t->size.w);
+ term_reset(t);
- /* NOTE: for now we assume that we blit the whole screen everytime */
- t->cursor.row = 0;
- t->cursor.col = 0;
for (size i = 0; i <= line_count; i++) {
size line_idx = get_line_idx(&t->log_lines, -line_count + i);
push_line(t, t->log_lines.buf + line_idx, a);
diff --git a/util.c b/util.c
@@ -53,3 +53,69 @@ s8_to_cstr(Arena *a, s8 s)
cstr[s.len] = 0;
return cstr;
}
+
+static s8
+utf8_encode(u32 cp)
+{
+ static u8 buf[4];
+ s8 ret = { .data = buf, .len = -1 };
+ if (cp < 0x80) {
+ ret.len = 1;
+ buf[0] = cp & 0x7F;
+ } else if (cp < 0x800) {
+ ret.len = 2;
+ buf[0] = ((cp >> 6) & 0x1F) | 0xC0;
+ buf[1] = ((cp >> 0) & 0x3F) | 0x80;
+ } else if (cp < 0x10000) {
+ ret.len = 3;
+ buf[0] = ((cp >> 12) & 0x0F) | 0xE0;
+ buf[1] = ((cp >> 6) & 0x3F) | 0x80;
+ buf[2] = ((cp >> 0) & 0x3F) | 0x80;
+ } else if (cp < 0x200000) {
+ ret.len = 4;
+ buf[0] = ((cp >> 18) & 0x07) | 0xF0;
+ buf[1] = ((cp >> 12) & 0x3F) | 0x80;
+ buf[2] = ((cp >> 6) & 0x3F) | 0x80;
+ buf[3] = ((cp >> 0) & 0x3F) | 0x80;
+ }
+ return ret;
+}
+
+/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
+static const u8 utf8d[] = {
+ /* The first part of the table maps bytes to character classes that
+ * to reduce the size of the transition table and create bitmasks. */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+ /* The second part is a transition table that maps a combination
+ * of a state of the automaton and a character class to a state. */
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static u32
+utf8decode(u32 *state, u32 *cp, u8 byte)
+{
+ u8 type = utf8d[byte];
+
+ *cp = (*state != UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*cp << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state + type];
+ return *state;
+}
diff --git a/util.h b/util.h
@@ -75,7 +75,7 @@ typedef struct { u8 *beg, *end; } Arena;
typedef struct { size len; u8 *data; } s8;
#define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s}
-enum CellAttr {
+enum cell_attribute {
ATTR_NULL = 0,
ATTR_BOLD = 1 << 0,
ATTR_ITALIC = 1 << 1,
@@ -97,6 +97,11 @@ typedef struct {
CellStyle style;
} Cell;
+typedef struct {
+ u32 row, col;
+ CellStyle state;
+} Cursor;
+
typedef Cell *Row;
typedef struct {
@@ -109,11 +114,6 @@ typedef struct {
size rows_alloc_size;
} Framebuffer;
-typedef struct {
- u32 row, col;
- CellStyle state;
-} Cursor;
-
/* NOTE: virtual memory ring buffer */
typedef struct {
size cap;
diff --git a/vtgl.c b/vtgl.c
@@ -52,7 +52,6 @@ set_projection_matrix(GLCtx *gl)
glUniformMatrix4fv(gl->post.Pmat, 1, GL_TRUE, pmat);
}
-
static void
resize(Term *t)
{
@@ -217,7 +216,9 @@ push_cell(RenderPushBuffer *rpb, GLCtx *gl, Cell c, Rect r, f32 font_text_dy)
u32 idx = get_render_push_buffer_idx(rpb, gl, 2);
Glyph g;
- i32 depth_idx = get_gpu_glyph_index(gl, c.cp, &g);
+ /* TODO: is defaulting to space correct? */
+ u32 cp = c.cp? c.cp : ' ';
+ i32 depth_idx = get_gpu_glyph_index(gl, cp, &g);
rpb->vertscales[idx + 0] = r.size;
rpb->vertscales[idx + 1] = (v2){.x = g.size.w, .y = g.size.h};
@@ -390,17 +391,15 @@ do_terminal(Term *t, Arena a)
}
t->unprocessed_bytes += os_read_from_child(t->child, &t->log, t->unprocessed_bytes);
s8 raw = {
- .len = t->unprocessed_bytes,
+ .len = t->unprocessed_bytes,
.data = t->log.buf + (t->log.widx - t->unprocessed_bytes)
};
size parsed_lines = split_raw_input_to_lines(t, raw);
/* TODO: think about only blitting update lines? */
(void)parsed_lines;
}
-
blit_lines(t, a);
- v2 ws = t->gl.window_size;
/* NOTE: reset the camera/viewport */
glUseProgram(t->gl.programs[SHADER_RENDER]);
glUniform1i(t->gl.render.texslot, 0);
@@ -409,6 +408,7 @@ do_terminal(Term *t, Arena a)
clear_colour();
render_framebuffer(t, rpb);
+ v2 ws = t->gl.window_size;
v2 cell_size = get_cell_size(t);
v2 cursor_pos = {
.x = t->cursor.col * cell_size.w,