vtgl

terminal emulator implemented in OpenGL
git clone anongit@rnpnr.xyz:vtgl.git
Log | Files | Refs | Feed | LICENSE

Commit: 21b5b3ee81f2061fa0184037cfc9768c560613e2
Parent: a123a12fde70712546102479d03566c37d248d4e
Author: Randy Palamar
Date:   Sun,  7 Jul 2024 17:51:33 -0600

properly handle unicode and cursor wrapping

Diffstat:
Mconfig.def.h | 2++
Mmain.c | 2+-
Mos_unix.c | 6+-----
Mterminal.c | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mutil.c | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mutil.h | 12++++++------
Mvtgl.c | 10+++++-----
7 files changed, 140 insertions(+), 46 deletions(-)

diff --git a/config.def.h b/config.def.h @@ -1,6 +1,8 @@ /* See LICENSE for copyright details */ static s8 g_default_title = s8("vtgl"); +static u32 g_default_fontsize = 36; + static u8 g_tabstop = 8; static Colour base16_colours[16] = { diff --git a/main.c b/main.c @@ -286,7 +286,7 @@ main(void) "/usr/share/fonts/gofont/Go-Mono.ttf", }; init_window(&term, memory); - init_fonts(&term, font_paths, ARRAY_COUNT(font_paths), 48, &memory); + init_fonts(&term, font_paths, ARRAY_COUNT(font_paths), g_default_fontsize, &memory); cursor_reset(&term); diff --git a/os_unix.c b/os_unix.c @@ -281,11 +281,7 @@ os_child_put_s8(os_child c, s8 text) static void os_child_put_char(os_child c, u32 cp) { - /* TODO: encode to utf-8 */ - ASSERT(cp <= 0x7f); - u8 character = (u8)cp; - s8 text = {.len = 1, .data = &character}; - os_child_put_s8(c, text); + os_child_put_s8(c, utf8_encode(cp)); } static void diff --git a/terminal.c b/terminal.c @@ -56,6 +56,20 @@ peek(s8 raw, size i) } static u32 +get_utf8(s8 *raw) +{ + u32 state = 0, cp; + size off = 0; + while (off < raw->len) { + if (!utf8decode(&state, &cp, raw->data[off++])) { + *raw = consume(*raw, off); + return cp; + } + } + return (u32)-1; +} + +static u32 get_ascii(s8 *raw) { ASSERT(raw->len > 0); @@ -441,6 +455,7 @@ handle_csi(Term *t, s8 *raw) u8 next; switch (csi.mode) { + case 'G': cursor_move_to(t, t->cursor.row, csi.argv[0] - 1); break; case 'H': cursor_move_to(t, csi.argv[0] - 1, csi.argv[1] - 1); break; case 'J': erase_in_display(t, &csi); break; case 'K': erase_in_line(t, &csi); break; @@ -619,6 +634,8 @@ check_if_escape_moves_cursor(Term *t, s8 *raw) { enum escape_moves_cursor_result result = EMC_NORMAL_RETURN; u32 cp = get_ascii(raw); + if (raw->len == 0) + return EMC_NEEDS_MORE_BYTES; switch(cp) { case '[': result = check_if_csi_moves_cursor(t, raw); @@ -654,7 +671,7 @@ split_raw_input_to_lines(Term *t, s8 raw) __m128i esc = _mm_set1_epi8(0x1B); __m128i uni = _mm_set1_epi8(0x80); - #define SPLIT_LONG 4096 + #define SPLIT_LONG 4096L while (raw.len) { __m128i hasutf8 = _mm_setzero_si128(); size count = raw.len > SPLIT_LONG ? SPLIT_LONG : raw.len; @@ -684,26 +701,28 @@ split_raw_input_to_lines(Term *t, s8 raw) lb->buf[lb->widx].has_unicode |= _mm_movemask_epi8(hasutf8); raw = consume(raw, data - raw.data); - if (peek(raw, 0) == 0x1B) { - s8 old = raw; - raw = consume(raw, 1); - switch (check_if_escape_moves_cursor(t, &raw)) { - case EMC_NEEDS_MORE_BYTES: - t->unprocessed_bytes = old.len; - return parsed_lines; - case EMC_CURSOR_MOVED: - parsed_lines++; - feed_line(lb, old.data, t->cursor.state); - break; - default: break; - } - } else { - u32 c = get_ascii(&raw); - if (c == '\n') { - parsed_lines++; - feed_line(lb, raw.data, t->cursor.state); - } else if (c & 0x80) { - lb->buf[lb->widx].has_unicode = 1; + if (raw.len) { + if (peek(raw, 0) == 0x1B) { + s8 old = raw; + raw = consume(raw, 1); + switch (check_if_escape_moves_cursor(t, &raw)) { + case EMC_NEEDS_MORE_BYTES: + t->unprocessed_bytes = old.len; + return parsed_lines; + case EMC_CURSOR_MOVED: + parsed_lines++; + feed_line(lb, old.data, t->cursor.state); + break; + default: break; + } + } else { + u32 c = get_ascii(&raw); + if (c == '\n') { + parsed_lines++; + feed_line(lb, raw.data, t->cursor.state); + } else if (c & 0x80) { + lb->buf[lb->widx].has_unicode = 1; + } } } @@ -754,9 +773,15 @@ push_line(Term *t, Line *line, Arena a) t->cursor.state = line->cursor_state; Cell *c; + b32 wrap_next = 0; while (l.len) { - /* TODO: handle unicode case */ - u32 cp = get_ascii(&l); + u32 cp; + if (line->has_unicode) cp = get_utf8(&l); + else cp = get_ascii(&l); + + /* TODO: handle error case */ + ASSERT(cp != (u32)-1); + switch (cp) { case 0x1B: handle_escape(t, &l, a); break; case '\r': t->cursor.col = 0; break; @@ -766,15 +791,22 @@ push_line(Term *t, Line *line, Arena a) cursor_move_to(t, t->cursor.row, t->cursor.col - 1); break; default: + if (wrap_next) + cursor_step_column(t, 1); + /* TODO properly make sure characters are printable */ CLAMP(cp, ' ', '~'); c = &t->fb.rows[t->cursor.row][t->cursor.col]; c->cp = cp; c->style = t->cursor.state; - /* TODO: properly advance cursor */ - cursor_step_column(t, 1); + + wrap_next = t->cursor.col + 1 == t->size.w; + if (!wrap_next) + cursor_step_column(t, 1); } } + if (wrap_next && (t->cursor.row != t->size.h - 1)) + cursor_step_column(t, 1); } static size @@ -794,12 +826,10 @@ blit_lines(Term *t, Arena a) if (line_count > t->log_lines.filled) line_count = t->log_lines.filled; + /* NOTE: for now we assume that we blit the whole screen everytime */ /* TODO: Performance!!! */ - fb_clear_region(t, 0, t->size.h, 0, t->size.w); + term_reset(t); - /* NOTE: for now we assume that we blit the whole screen everytime */ - t->cursor.row = 0; - t->cursor.col = 0; for (size i = 0; i <= line_count; i++) { size line_idx = get_line_idx(&t->log_lines, -line_count + i); push_line(t, t->log_lines.buf + line_idx, a); diff --git a/util.c b/util.c @@ -53,3 +53,69 @@ s8_to_cstr(Arena *a, s8 s) cstr[s.len] = 0; return cstr; } + +static s8 +utf8_encode(u32 cp) +{ + static u8 buf[4]; + s8 ret = { .data = buf, .len = -1 }; + if (cp < 0x80) { + ret.len = 1; + buf[0] = cp & 0x7F; + } else if (cp < 0x800) { + ret.len = 2; + buf[0] = ((cp >> 6) & 0x1F) | 0xC0; + buf[1] = ((cp >> 0) & 0x3F) | 0x80; + } else if (cp < 0x10000) { + ret.len = 3; + buf[0] = ((cp >> 12) & 0x0F) | 0xE0; + buf[1] = ((cp >> 6) & 0x3F) | 0x80; + buf[2] = ((cp >> 0) & 0x3F) | 0x80; + } else if (cp < 0x200000) { + ret.len = 4; + buf[0] = ((cp >> 18) & 0x07) | 0xF0; + buf[1] = ((cp >> 12) & 0x3F) | 0x80; + buf[2] = ((cp >> 6) & 0x3F) | 0x80; + buf[3] = ((cp >> 0) & 0x3F) | 0x80; + } + return ret; +} + +/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> + * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ +static const u8 utf8d[] = { + /* The first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + /* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +#define UTF8_ACCEPT 0 +#define UTF8_REJECT 1 + +static u32 +utf8decode(u32 *state, u32 *cp, u8 byte) +{ + u8 type = utf8d[byte]; + + *cp = (*state != UTF8_ACCEPT) ? + (byte & 0x3fu) | (*cp << 6) : + (0xff >> type) & (byte); + + *state = utf8d[256 + *state + type]; + return *state; +} diff --git a/util.h b/util.h @@ -75,7 +75,7 @@ typedef struct { u8 *beg, *end; } Arena; typedef struct { size len; u8 *data; } s8; #define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s} -enum CellAttr { +enum cell_attribute { ATTR_NULL = 0, ATTR_BOLD = 1 << 0, ATTR_ITALIC = 1 << 1, @@ -97,6 +97,11 @@ typedef struct { CellStyle style; } Cell; +typedef struct { + u32 row, col; + CellStyle state; +} Cursor; + typedef Cell *Row; typedef struct { @@ -109,11 +114,6 @@ typedef struct { size rows_alloc_size; } Framebuffer; -typedef struct { - u32 row, col; - CellStyle state; -} Cursor; - /* NOTE: virtual memory ring buffer */ typedef struct { size cap; diff --git a/vtgl.c b/vtgl.c @@ -52,7 +52,6 @@ set_projection_matrix(GLCtx *gl) glUniformMatrix4fv(gl->post.Pmat, 1, GL_TRUE, pmat); } - static void resize(Term *t) { @@ -217,7 +216,9 @@ push_cell(RenderPushBuffer *rpb, GLCtx *gl, Cell c, Rect r, f32 font_text_dy) u32 idx = get_render_push_buffer_idx(rpb, gl, 2); Glyph g; - i32 depth_idx = get_gpu_glyph_index(gl, c.cp, &g); + /* TODO: is defaulting to space correct? */ + u32 cp = c.cp? c.cp : ' '; + i32 depth_idx = get_gpu_glyph_index(gl, cp, &g); rpb->vertscales[idx + 0] = r.size; rpb->vertscales[idx + 1] = (v2){.x = g.size.w, .y = g.size.h}; @@ -390,17 +391,15 @@ do_terminal(Term *t, Arena a) } t->unprocessed_bytes += os_read_from_child(t->child, &t->log, t->unprocessed_bytes); s8 raw = { - .len = t->unprocessed_bytes, + .len = t->unprocessed_bytes, .data = t->log.buf + (t->log.widx - t->unprocessed_bytes) }; size parsed_lines = split_raw_input_to_lines(t, raw); /* TODO: think about only blitting update lines? */ (void)parsed_lines; } - blit_lines(t, a); - v2 ws = t->gl.window_size; /* NOTE: reset the camera/viewport */ glUseProgram(t->gl.programs[SHADER_RENDER]); glUniform1i(t->gl.render.texslot, 0); @@ -409,6 +408,7 @@ do_terminal(Term *t, Arena a) clear_colour(); render_framebuffer(t, rpb); + v2 ws = t->gl.window_size; v2 cell_size = get_cell_size(t); v2 cursor_pos = { .x = t->cursor.col * cell_size.w,