vtgl

terminal emulator implemented in OpenGL
git clone anongit@rnpnr.xyz:vtgl.git
Log | Files | Refs | Feed | LICENSE

Commit: 6f9a8436cf0cc98b4c2c944cc4c2124b69507270
Parent: 60fa5e65f199607d3edda83635fc75f7906201c7
Author: Randy Palamar
Date:   Wed, 23 Oct 2024 21:48:47 -0600

remove nasty selection check in fb_clear_region

vis likes to emit CSI that calls this and it was quite expensive
largely due to the stupid branching by checking the selection for
every cell. Checking beforehand with a proper inclusion check
saves about 50% of the cycles. It is still roughly 10x slower than
it should be but this is because of the tricky pointer business.
It may need to be rexplored later.

Diffstat:
Mterminal.c | 37+++++++++++++++++++++++++++++--------
Mutil.h | 2+-
Mvtgl.c | 11+++++++++--
3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/terminal.c b/terminal.c @@ -149,6 +149,21 @@ is_selected(Selection *s, i32 x, i32 y) return result; } +static b32 +selection_intersects_region(Selection *s, uv2 tl, uv2 br) +{ + /* TODO: maybe this can be further simplified (eg. with a k-map) */ + Range r = s->range; + b32 valid = is_valid_range(r); + b32 whole = r.start.y < tl.y && r.end.y > br.y; + b32 start_x = r.start.x >= tl.x && r.start.x <= br.x; + b32 start_y = r.start.y >= tl.y && r.start.y <= br.y; + b32 end_x = r.end.x >= tl.x && r.end.x <= br.x; + b32 end_y = r.end.y >= tl.y && r.end.y <= br.y; + b32 result = valid && (whole || (start_y && start_x) || (end_y && end_x)); + return result; +} + static void fb_clear_region(Term *t, u32 r1, u32 r2, u32 c1, u32 c2) { @@ -169,16 +184,20 @@ fb_clear_region(Term *t, u32 r1, u32 r2, u32 c1, u32 c2) CLAMP(r1, 0, t->size.h - 1); CLAMP(r2, 0, t->size.h - 1); - TermView *tv = t->views + t->view_idx; - for (u32 r = r1; r <= r2; r++) { - for (u32 c = c1; c <= c2; c++) { - tv->fb.rows[r][c].style = t->cursor.style; - tv->fb.rows[r][c].cp = ' '; - /* TODO: this shouldn't be in this loop; make a region intersection test */ - if (is_selected(&t->selection, c, r)) - selection_clear(&t->selection); + uv2 top_left = {.x = c1, .y = r1}; + uv2 bottom_right = {.x = c2, .y = r2}; + if (selection_intersects_region(&t->selection, top_left, bottom_right)) + selection_clear(&t->selection); + + Row *rows = t->views[t->view_idx].fb.rows; + CellStyle cursor_style = t->cursor.style; + for (u32 r = top_left.y; r <= bottom_right.y; r++) { + for (u32 c = top_left.x; c <= bottom_right.x; c++) { + rows[r][c].style = cursor_style; + rows[r][c].cp = ' '; } } + END_TIMED_BLOCK(); } @@ -1010,6 +1029,7 @@ validate_osc(Term *t, s8 *raw) static enum escape_moves_cursor_result check_if_csi_moves_cursor(Term *t, s8 *raw) { + BEGIN_TIMED_BLOCK(); enum escape_moves_cursor_result result = EMC_NORMAL_RETURN; CSI csi = {0}; if (parse_csi(raw, &csi) == -1) @@ -1039,6 +1059,7 @@ check_if_csi_moves_cursor(Term *t, s8 *raw) if (mode != (t->mode & TM_ALTSCREEN)) result = EMC_SWAPPED_SCREEN; + END_TIMED_BLOCK(); return result; } diff --git a/util.h b/util.h @@ -75,7 +75,7 @@ typedef union { f32 E[2]; } v2; -typedef union { +typedef __attribute__((aligned(16))) union { struct { f32 x, y, z, w; }; struct { f32 r, g, b, a; }; f32 E[4]; diff --git a/vtgl.c b/vtgl.c @@ -215,6 +215,8 @@ get_render_push_buffer_idx(RenderCtx *rc, u32 count) static void push_rect_full(RenderCtx *rc, Rect r, v4 colour, v2 min_tex_coord, v2 max_tex_coord) { + BEGIN_TIMED_BLOCK(); + u32 idx = get_render_push_buffer_idx(rc, 4); v2 start = r.pos; v2 end = {.x = r.pos.x + r.size.w, .y = r.pos.y + r.size.h}; @@ -234,6 +236,8 @@ push_rect_full(RenderCtx *rc, Rect r, v4 colour, v2 min_tex_coord, v2 max_tex_co rpb->colours[idx + 1] = colour; rpb->colours[idx + 2] = colour; rpb->colours[idx + 3] = colour; + + END_TIMED_BLOCK(); } static void @@ -261,6 +265,7 @@ push_rect(RenderCtx *rc, Rect r, v4 colour) static v2 push_s8(RenderCtx *rc, v2 pos, v4 colour, u32 font_id, s8 s) { + BEGIN_TIMED_BLOCK(); CachedGlyph *cg; v2 start, end, text_size = {0}; v2 scale = {.x = 1.0f / rc->gl->glyph_bitmap_dim.x, .y = 1.0f / rc->gl->glyph_bitmap_dim.y}; @@ -287,6 +292,8 @@ push_s8(RenderCtx *rc, v2 pos, v4 colour, u32 font_id, s8 s) text_size.x -= cg->advance; text_size.w += cg->width; + END_TIMED_BLOCK(); + return text_size; } @@ -974,8 +981,8 @@ draw_debug_overlay(Term *t, RenderCtx *rc) DebugRecord *dr = debug_records + i; s8 txt = buf; - txt.len = snprintf((char *)txt.data, buf.len, "%29s: %5u %4s %11.02f cycs/hit", - dr->function_name, hits[i], hits[i] > 1? "hits" : "hit", + txt.len = snprintf((char *)txt.data, buf.len, "%29s: %9lu cycs %5u %4s %11.02f cycs/hit", + dr->function_name, cycs[i], hits[i], hits[i] > 1? "hits" : "hit", (f32)cycs[i]/(f32)hits[i]); txt_pos.y = (u32)(txt_pos.y - line_height - line_pad); v2 ts = push_s8(rc, txt_pos, fg, font_id, txt);