Commit: 21b5b3ee81f2061fa0184037cfc9768c560613e2
Parent: a123a12fde70712546102479d03566c37d248d4e
Author: Randy Palamar
Date:   Sun,  7 Jul 2024 17:51:33 -0600
properly handle unicode and cursor wrapping
Diffstat:
| M | config.def.h |  |  | 2 | ++ | 
| M | main.c |  |  | 2 | +- | 
| M | os_unix.c |  |  | 6 | +----- | 
| M | terminal.c |  |  | 88 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- | 
| M | util.c |  |  | 66 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| M | util.h |  |  | 12 | ++++++------ | 
| M | vtgl.c |  |  | 10 | +++++----- | 
7 files changed, 140 insertions(+), 46 deletions(-)
diff --git a/config.def.h b/config.def.h
@@ -1,6 +1,8 @@
 /* See LICENSE for copyright details */
 static s8 g_default_title = s8("vtgl");
 
+static u32 g_default_fontsize = 36;
+
 static u8 g_tabstop = 8;
 
 static Colour base16_colours[16] = {
diff --git a/main.c b/main.c
@@ -286,7 +286,7 @@ main(void)
 		"/usr/share/fonts/gofont/Go-Mono.ttf",
 	};
 	init_window(&term, memory);
-	init_fonts(&term, font_paths, ARRAY_COUNT(font_paths), 48, &memory);
+	init_fonts(&term, font_paths, ARRAY_COUNT(font_paths), g_default_fontsize, &memory);
 
 	cursor_reset(&term);
 
diff --git a/os_unix.c b/os_unix.c
@@ -281,11 +281,7 @@ os_child_put_s8(os_child c, s8 text)
 static void
 os_child_put_char(os_child c, u32 cp)
 {
-	/* TODO: encode to utf-8 */
-	ASSERT(cp <= 0x7f);
-	u8 character = (u8)cp;
-	s8 text = {.len = 1, .data = &character};
-	os_child_put_s8(c, text);
+	os_child_put_s8(c, utf8_encode(cp));
 }
 
 static void
diff --git a/terminal.c b/terminal.c
@@ -56,6 +56,20 @@ peek(s8 raw, size i)
 }
 
 static u32
+get_utf8(s8 *raw)
+{
+	u32 state = 0, cp;
+	size off = 0;
+	while (off < raw->len) {
+		if (!utf8decode(&state, &cp, raw->data[off++])) {
+			*raw = consume(*raw, off);
+			return cp;
+		}
+	}
+	return (u32)-1;
+}
+
+static u32
 get_ascii(s8 *raw)
 {
 	ASSERT(raw->len > 0);
@@ -441,6 +455,7 @@ handle_csi(Term *t, s8 *raw)
 
 	u8 next;
 	switch (csi.mode) {
+	case 'G': cursor_move_to(t, t->cursor.row, csi.argv[0] - 1);   break;
 	case 'H': cursor_move_to(t, csi.argv[0] - 1, csi.argv[1] - 1); break;
 	case 'J': erase_in_display(t, &csi);                           break;
 	case 'K': erase_in_line(t, &csi);                              break;
@@ -619,6 +634,8 @@ check_if_escape_moves_cursor(Term *t, s8 *raw)
 {
 	enum escape_moves_cursor_result result = EMC_NORMAL_RETURN;
 	u32 cp = get_ascii(raw);
+	if (raw->len == 0)
+		return EMC_NEEDS_MORE_BYTES;
 	switch(cp) {
 	case '[':
 		result = check_if_csi_moves_cursor(t, raw);
@@ -654,7 +671,7 @@ split_raw_input_to_lines(Term *t, s8 raw)
 	__m128i esc = _mm_set1_epi8(0x1B);
 	__m128i uni = _mm_set1_epi8(0x80);
 
-	#define SPLIT_LONG 4096
+	#define SPLIT_LONG 4096L
 	while (raw.len) {
 		__m128i hasutf8 = _mm_setzero_si128();
 		size count = raw.len > SPLIT_LONG ? SPLIT_LONG : raw.len;
@@ -684,26 +701,28 @@ split_raw_input_to_lines(Term *t, s8 raw)
 		lb->buf[lb->widx].has_unicode |= _mm_movemask_epi8(hasutf8);
 		raw = consume(raw, data - raw.data);
 
-		if (peek(raw, 0) == 0x1B) {
-			s8 old = raw;
-			raw = consume(raw, 1);
-			switch (check_if_escape_moves_cursor(t, &raw)) {
-			case EMC_NEEDS_MORE_BYTES:
-				t->unprocessed_bytes = old.len;
-				return parsed_lines;
-			case EMC_CURSOR_MOVED:
-				parsed_lines++;
-				feed_line(lb, old.data, t->cursor.state);
-				break;
-			default: break;
-			}
-		} else {
-			u32 c = get_ascii(&raw);
-			if (c == '\n') {
-				parsed_lines++;
-				feed_line(lb, raw.data, t->cursor.state);
-			} else if (c & 0x80) {
-				lb->buf[lb->widx].has_unicode = 1;
+		if (raw.len) {
+			if (peek(raw, 0) == 0x1B) {
+				s8 old = raw;
+				raw = consume(raw, 1);
+				switch (check_if_escape_moves_cursor(t, &raw)) {
+				case EMC_NEEDS_MORE_BYTES:
+					t->unprocessed_bytes = old.len;
+					return parsed_lines;
+				case EMC_CURSOR_MOVED:
+					parsed_lines++;
+					feed_line(lb, old.data, t->cursor.state);
+					break;
+				default: break;
+				}
+			} else {
+				u32 c = get_ascii(&raw);
+				if (c == '\n') {
+					parsed_lines++;
+					feed_line(lb, raw.data, t->cursor.state);
+				} else if (c & 0x80) {
+					lb->buf[lb->widx].has_unicode = 1;
+				}
 			}
 		}
 
@@ -754,9 +773,15 @@ push_line(Term *t, Line *line, Arena a)
 	t->cursor.state = line->cursor_state;
 
 	Cell *c;
+	b32 wrap_next = 0;
 	while (l.len) {
-		/* TODO: handle unicode case */
-		u32 cp = get_ascii(&l);
+		u32 cp;
+		if (line->has_unicode) cp = get_utf8(&l);
+		else                   cp = get_ascii(&l);
+
+		/* TODO: handle error case */
+		ASSERT(cp != (u32)-1);
+
 		switch (cp) {
 		case 0x1B: handle_escape(t, &l, a); break;
 		case '\r': t->cursor.col = 0;       break;
@@ -766,15 +791,22 @@ push_line(Term *t, Line *line, Arena a)
 			cursor_move_to(t, t->cursor.row, t->cursor.col - 1);
 			break;
 		default:
+			if (wrap_next)
+				cursor_step_column(t, 1);
+
 			/* TODO properly make sure characters are printable */
 			CLAMP(cp, ' ', '~');
 			c = &t->fb.rows[t->cursor.row][t->cursor.col];
 			c->cp    = cp;
 			c->style = t->cursor.state;
-			/* TODO: properly advance cursor */
-			cursor_step_column(t, 1);
+
+			wrap_next = t->cursor.col + 1 == t->size.w;
+			if (!wrap_next)
+				cursor_step_column(t, 1);
 		}
 	}
+	if (wrap_next && (t->cursor.row != t->size.h - 1))
+		cursor_step_column(t, 1);
 }
 
 static size
@@ -794,12 +826,10 @@ blit_lines(Term *t, Arena a)
 	if (line_count > t->log_lines.filled)
 		line_count = t->log_lines.filled;
 
+	/* NOTE: for now we assume that we blit the whole screen everytime */
 	/* TODO: Performance!!! */
-	fb_clear_region(t, 0, t->size.h, 0, t->size.w);
+	term_reset(t);
 
-	/* NOTE: for now we assume that we blit the whole screen everytime */
-	t->cursor.row = 0;
-	t->cursor.col = 0;
 	for (size i = 0; i <= line_count; i++) {
 		size line_idx = get_line_idx(&t->log_lines, -line_count + i);
 		push_line(t, t->log_lines.buf + line_idx, a);
diff --git a/util.c b/util.c
@@ -53,3 +53,69 @@ s8_to_cstr(Arena *a, s8 s)
 	cstr[s.len] = 0;
 	return cstr;
 }
+
+static s8
+utf8_encode(u32 cp)
+{
+	static u8 buf[4];
+	s8 ret = { .data = buf, .len = -1 };
+	if (cp < 0x80) {
+		ret.len = 1;
+		buf[0] = cp & 0x7F;
+	} else if (cp < 0x800) {
+		ret.len = 2;
+		buf[0] = ((cp >>  6) & 0x1F) | 0xC0;
+		buf[1] = ((cp >>  0) & 0x3F) | 0x80;
+	} else if (cp < 0x10000) {
+		ret.len = 3;
+		buf[0] = ((cp >> 12) & 0x0F) | 0xE0;
+		buf[1] = ((cp >>  6) & 0x3F) | 0x80;
+		buf[2] = ((cp >>  0) & 0x3F) | 0x80;
+	} else if (cp < 0x200000) {
+		ret.len = 4;
+		buf[0] = ((cp >> 18) & 0x07) | 0xF0;
+		buf[1] = ((cp >> 12) & 0x3F) | 0x80;
+		buf[2] = ((cp >>  6) & 0x3F) | 0x80;
+		buf[3] = ((cp >>  0) & 0x3F) | 0x80;
+	}
+	return ret;
+}
+
+/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
+static const u8 utf8d[] = {
+	/* The first part of the table maps bytes to character classes that
+	 * to reduce the size of the transition table and create bitmasks. */
+	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+	 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+	 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+	 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+	10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+	/* The second part is a transition table that maps a combination
+	 * of a state of the automaton and a character class to a state. */
+	 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+	12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+	12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+	12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+	12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static u32
+utf8decode(u32 *state, u32 *cp, u8 byte)
+{
+	u8 type = utf8d[byte];
+
+	*cp = (*state != UTF8_ACCEPT) ?
+	         (byte & 0x3fu) | (*cp << 6) :
+	         (0xff >> type) & (byte);
+
+	*state = utf8d[256 + *state + type];
+	return *state;
+}
diff --git a/util.h b/util.h
@@ -75,7 +75,7 @@ typedef struct { u8 *beg, *end; } Arena;
 typedef struct { size len; u8 *data; } s8;
 #define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s}
 
-enum CellAttr {
+enum cell_attribute {
 	ATTR_NULL       = 0,
 	ATTR_BOLD       = 1 << 0,
 	ATTR_ITALIC     = 1 << 1,
@@ -97,6 +97,11 @@ typedef struct {
 	CellStyle style;
 } Cell;
 
+typedef struct {
+	u32       row, col;
+	CellStyle state;
+} Cursor;
+
 typedef Cell *Row;
 
 typedef struct {
@@ -109,11 +114,6 @@ typedef struct {
 	size rows_alloc_size;
 } Framebuffer;
 
-typedef struct {
-	u32 row, col;
-	CellStyle state;
-} Cursor;
-
 /* NOTE: virtual memory ring buffer */
 typedef struct {
 	size  cap;
diff --git a/vtgl.c b/vtgl.c
@@ -52,7 +52,6 @@ set_projection_matrix(GLCtx *gl)
 	glUniformMatrix4fv(gl->post.Pmat, 1, GL_TRUE, pmat);
 }
 
-
 static void
 resize(Term *t)
 {
@@ -217,7 +216,9 @@ push_cell(RenderPushBuffer *rpb, GLCtx *gl, Cell c, Rect r, f32 font_text_dy)
 	u32 idx = get_render_push_buffer_idx(rpb, gl, 2);
 
 	Glyph g;
-	i32 depth_idx = get_gpu_glyph_index(gl, c.cp, &g);
+	/* TODO: is defaulting to space correct? */
+	u32 cp = c.cp? c.cp : ' ';
+	i32 depth_idx = get_gpu_glyph_index(gl, cp, &g);
 
 	rpb->vertscales[idx + 0] = r.size;
 	rpb->vertscales[idx + 1] = (v2){.x = g.size.w, .y = g.size.h};
@@ -390,17 +391,15 @@ do_terminal(Term *t, Arena a)
 		}
 		t->unprocessed_bytes += os_read_from_child(t->child, &t->log, t->unprocessed_bytes);
 		s8 raw = {
-			.len = t->unprocessed_bytes,
+			.len  = t->unprocessed_bytes,
 			.data = t->log.buf + (t->log.widx - t->unprocessed_bytes)
 		};
 		size parsed_lines = split_raw_input_to_lines(t, raw);
 		/* TODO: think about only blitting update lines? */
 		(void)parsed_lines;
 	}
-
 	blit_lines(t, a);
 
-	v2 ws = t->gl.window_size;
 	/* NOTE: reset the camera/viewport */
 	glUseProgram(t->gl.programs[SHADER_RENDER]);
 	glUniform1i(t->gl.render.texslot, 0);
@@ -409,6 +408,7 @@ do_terminal(Term *t, Arena a)
 	clear_colour();
 	render_framebuffer(t, rpb);
 
+	v2 ws = t->gl.window_size;
 	v2 cell_size  = get_cell_size(t);
 	v2 cursor_pos = {
 		.x = t->cursor.col * cell_size.w,