Commit: 6f9a8436cf0cc98b4c2c944cc4c2124b69507270
Parent: 60fa5e65f199607d3edda83635fc75f7906201c7
Author: Randy Palamar
Date: Wed, 23 Oct 2024 21:48:47 -0600
remove nasty selection check in fb_clear_region
vis likes to emit CSI that calls this and it was quite expensive
largely due to the stupid branching by checking the selection for
every cell. Checking beforehand with a proper inclusion check
saves about 50% of the cycles. It is still roughly 10x slower than
it should be but this is because of the tricky pointer business.
It may need to be rexplored later.
Diffstat:
3 files changed, 39 insertions(+), 11 deletions(-)
diff --git a/terminal.c b/terminal.c
@@ -149,6 +149,21 @@ is_selected(Selection *s, i32 x, i32 y)
return result;
}
+static b32
+selection_intersects_region(Selection *s, uv2 tl, uv2 br)
+{
+ /* TODO: maybe this can be further simplified (eg. with a k-map) */
+ Range r = s->range;
+ b32 valid = is_valid_range(r);
+ b32 whole = r.start.y < tl.y && r.end.y > br.y;
+ b32 start_x = r.start.x >= tl.x && r.start.x <= br.x;
+ b32 start_y = r.start.y >= tl.y && r.start.y <= br.y;
+ b32 end_x = r.end.x >= tl.x && r.end.x <= br.x;
+ b32 end_y = r.end.y >= tl.y && r.end.y <= br.y;
+ b32 result = valid && (whole || (start_y && start_x) || (end_y && end_x));
+ return result;
+}
+
static void
fb_clear_region(Term *t, u32 r1, u32 r2, u32 c1, u32 c2)
{
@@ -169,16 +184,20 @@ fb_clear_region(Term *t, u32 r1, u32 r2, u32 c1, u32 c2)
CLAMP(r1, 0, t->size.h - 1);
CLAMP(r2, 0, t->size.h - 1);
- TermView *tv = t->views + t->view_idx;
- for (u32 r = r1; r <= r2; r++) {
- for (u32 c = c1; c <= c2; c++) {
- tv->fb.rows[r][c].style = t->cursor.style;
- tv->fb.rows[r][c].cp = ' ';
- /* TODO: this shouldn't be in this loop; make a region intersection test */
- if (is_selected(&t->selection, c, r))
- selection_clear(&t->selection);
+ uv2 top_left = {.x = c1, .y = r1};
+ uv2 bottom_right = {.x = c2, .y = r2};
+ if (selection_intersects_region(&t->selection, top_left, bottom_right))
+ selection_clear(&t->selection);
+
+ Row *rows = t->views[t->view_idx].fb.rows;
+ CellStyle cursor_style = t->cursor.style;
+ for (u32 r = top_left.y; r <= bottom_right.y; r++) {
+ for (u32 c = top_left.x; c <= bottom_right.x; c++) {
+ rows[r][c].style = cursor_style;
+ rows[r][c].cp = ' ';
}
}
+
END_TIMED_BLOCK();
}
@@ -1010,6 +1029,7 @@ validate_osc(Term *t, s8 *raw)
static enum escape_moves_cursor_result
check_if_csi_moves_cursor(Term *t, s8 *raw)
{
+ BEGIN_TIMED_BLOCK();
enum escape_moves_cursor_result result = EMC_NORMAL_RETURN;
CSI csi = {0};
if (parse_csi(raw, &csi) == -1)
@@ -1039,6 +1059,7 @@ check_if_csi_moves_cursor(Term *t, s8 *raw)
if (mode != (t->mode & TM_ALTSCREEN))
result = EMC_SWAPPED_SCREEN;
+ END_TIMED_BLOCK();
return result;
}
diff --git a/util.h b/util.h
@@ -75,7 +75,7 @@ typedef union {
f32 E[2];
} v2;
-typedef union {
+typedef __attribute__((aligned(16))) union {
struct { f32 x, y, z, w; };
struct { f32 r, g, b, a; };
f32 E[4];
diff --git a/vtgl.c b/vtgl.c
@@ -215,6 +215,8 @@ get_render_push_buffer_idx(RenderCtx *rc, u32 count)
static void
push_rect_full(RenderCtx *rc, Rect r, v4 colour, v2 min_tex_coord, v2 max_tex_coord)
{
+ BEGIN_TIMED_BLOCK();
+
u32 idx = get_render_push_buffer_idx(rc, 4);
v2 start = r.pos;
v2 end = {.x = r.pos.x + r.size.w, .y = r.pos.y + r.size.h};
@@ -234,6 +236,8 @@ push_rect_full(RenderCtx *rc, Rect r, v4 colour, v2 min_tex_coord, v2 max_tex_co
rpb->colours[idx + 1] = colour;
rpb->colours[idx + 2] = colour;
rpb->colours[idx + 3] = colour;
+
+ END_TIMED_BLOCK();
}
static void
@@ -261,6 +265,7 @@ push_rect(RenderCtx *rc, Rect r, v4 colour)
static v2
push_s8(RenderCtx *rc, v2 pos, v4 colour, u32 font_id, s8 s)
{
+ BEGIN_TIMED_BLOCK();
CachedGlyph *cg;
v2 start, end, text_size = {0};
v2 scale = {.x = 1.0f / rc->gl->glyph_bitmap_dim.x, .y = 1.0f / rc->gl->glyph_bitmap_dim.y};
@@ -287,6 +292,8 @@ push_s8(RenderCtx *rc, v2 pos, v4 colour, u32 font_id, s8 s)
text_size.x -= cg->advance;
text_size.w += cg->width;
+ END_TIMED_BLOCK();
+
return text_size;
}
@@ -974,8 +981,8 @@ draw_debug_overlay(Term *t, RenderCtx *rc)
DebugRecord *dr = debug_records + i;
s8 txt = buf;
- txt.len = snprintf((char *)txt.data, buf.len, "%29s: %5u %4s %11.02f cycs/hit",
- dr->function_name, hits[i], hits[i] > 1? "hits" : "hit",
+ txt.len = snprintf((char *)txt.data, buf.len, "%29s: %9lu cycs %5u %4s %11.02f cycs/hit",
+ dr->function_name, cycs[i], hits[i], hits[i] > 1? "hits" : "hit",
(f32)cycs[i]/(f32)hits[i]);
txt_pos.y = (u32)(txt_pos.y - line_height - line_pad);
v2 ts = push_s8(rc, txt_pos, fg, font_id, txt);