vtgl

terminal emulator implemented in OpenGL
git clone anongit@rnpnr.xyz:vtgl.git
Log | Files | Refs | Feed | LICENSE

Commit: 7799e7695bfe6f863c15a872c4549f7c73741d94
Parent: 26e708a0acd69e6e7c9ecfe1ed0a0feb27251105
Author: Randy Palamar
Date:   Sun,  8 Dec 2024 16:25:13 -0700

fix multithreaded shader reloading and add work queue

The main thread does not hold the GL context so it cannot reload
shaders. To solve this we add a work queue for the render thread
which can also be used for other things.

Diffstat:
Mdebug.h | 10+++++-----
Mplatform_linux_common.c | 2--
Mutil.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Mutil.h | 25++++++++++++++++---------
Mvtgl.c | 140++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mvtgl.h | 5+++++
6 files changed, 151 insertions(+), 81 deletions(-)

diff --git a/debug.h b/debug.h @@ -105,11 +105,11 @@ typedef struct { static DebugTable g_debug_table; #define RECORD_DEBUG_EVENT_COMMON(counter, event_type) \ - u64 event_index = __atomic_fetch_add(&g_debug_table.event_array_event_index, 1, __ATOMIC_RELAXED); \ - ASSERT((event_index & 0xFFFFFFFF) < MAX_DEBUG_EVENT_COUNT); \ - DebugEvent *event = g_debug_table.events[event_index >> 32] + (event_index & 0xFFFFFFFF); \ - event->clock = __rdtsc(); \ - event->metadata_index = counter; \ + u64 event_index = atomic_fetch_add(&g_debug_table.event_array_event_index, 1); \ + ASSERT((event_index & 0xFFFFFFFF) < MAX_DEBUG_EVENT_COUNT); \ + DebugEvent *event = g_debug_table.events[event_index >> 32] + (event_index & 0xFFFFFFFF); \ + event->clock = __rdtsc(); \ + event->metadata_index = counter; \ event->type = event_type #define RECORD_DEBUG_EVENT(counter, event_type) \ diff --git a/platform_linux_common.c b/platform_linux_common.c @@ -55,8 +55,6 @@ #define OS_MAP_READ PROT_READ #define OS_MAP_PRIVATE MAP_PRIVATE -#define atomic_exchange_n(ptr, val) __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST) - struct __attribute__((aligned(16))) stack_base { void (*entry)(struct stack_base *stack); Arena thread_arena; diff --git a/util.c b/util.c @@ -90,6 +90,56 @@ normalize_range(Range r) return result; } +/* NOTE(rnp): based on nullprogram's lock-free, concurrent, + * generic queue in 32 bits */ +static i32 +work_queue_push(u32 *q, u32 capacity) +{ + ASSERT(ISPOWEROFTWO(capacity)); + u32 r = atomic_load(q); + i32 mask = capacity - 1; + i32 head = r & mask; + i32 tail = (r >> 16) & mask; + i32 next = (head + 1) & mask; + /* NOTE(rnp): prevent an overflow into the tail on commit */ + if (r & 0x8000) atomic_and(q, ~0x8000u); + return next == tail ? -1 : head; +} + +static void +work_queue_push_commit(u32 *q) +{ + atomic_fetch_add(q, 1); +} + +static i32 +work_queue_pop(u32 *q, u32 capacity) +{ + ASSERT(ISPOWEROFTWO(capacity)); + u32 r = atomic_load(q); + i32 mask = capacity - 1; + i32 head = r & mask; + i32 tail = (r >> 16) & mask; + return head == tail ? -1 : tail; +} + +static void +work_queue_pop_commit(u32 *q) +{ + atomic_fetch_add(q, 0x10000u); +} + +static b32 +work_queue_empty(u32 *q, u32 capacity) +{ + ASSERT(ISPOWEROFTWO(capacity)); + u32 r = atomic_load(q); + i32 mask = capacity - 1; + i32 head = r & mask; + i32 tail = (r >> 16) & mask; + return head == tail; +} + static void mem_copy(void *src, void *dest, size len) { diff --git a/util.h b/util.h @@ -59,7 +59,6 @@ typedef struct { } while(0) typedef struct Variable { - enum variable_type type; union { b32 b32; u32 u32; @@ -73,6 +72,7 @@ typedef struct Variable { VariableLink group; SLLVariableVector vector; }; + enum variable_type type; } Variable; enum cell_attribute { @@ -148,14 +148,6 @@ typedef struct { RingBuf log; LineBuf lines; Framebuffer fb; - /* NOTE: the position of the cursor the last time a new line was blitted - * and the index of the line. This is needed because we blit whole lines - * at a time unlike traditional terminal emulators which just operate as - * a state machine. Any time a line hasn't played to completion we must - * restart it from the original location lest it unintentionally cause a - * screen scroll. */ - iv2 last_cursor_pos; - size last_line_idx; } TermView; enum terminal_mode { @@ -433,6 +425,17 @@ typedef struct RenderCtx { Arena a; } RenderCtx; +enum work_queue_entry_type { + WQ_FILL_RENDERBUFFER, + WQ_SHADER_RELOAD, + WQ_WINDOW_RESIZE, +}; + +typedef struct { + void *ctx; + enum work_queue_entry_type type; +} work_queue_entry; + typedef struct Term { GLCtx gl; FontAtlas fa; @@ -440,6 +443,10 @@ typedef struct Term { Arena arena_for_frame; TempArena temp_arena; + work_queue_entry *work_queue_items; + u32 work_queue_capacity; + u32 work_queue; + InteractionState interaction; Selection selection; diff --git a/vtgl.c b/vtgl.c @@ -34,11 +34,21 @@ " gl_Position = u_Pmat * vec4(vertex_position, 0.0, 1.0);\n" \ "}\n" -typedef struct { - TerminalMemory *memory; - s8 info; - u32 stage; -} shader_reload_ctx; +static void +set_projection_matrix(GLCtx *gl, u32 stage) +{ + f32 w = gl->window_size.w; + f32 h = gl->window_size.h; + + f32 pmat[4 * 4] = { + 2.0/w, 0.0, 0.0, -1.0, + 0.0, 2.0/h, 0.0, -1.0, + 0.0, 0.0, -1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + }; + + glProgramUniformMatrix4fv(gl->programs[stage], SHADER_PMAT_LOC, 1, GL_TRUE, pmat); +} static u32 compile_shader(Arena a, u32 type, s8 shader) @@ -93,6 +103,13 @@ program_from_shader_text(s8 vertex, s8 fragment, Arena a) return pid; } +typedef struct { + Term *t; + u8 *path; + s8 info; + u32 stage; +} queue_shader_reload_ctx; + static void update_uniforms(GLCtx *gl, enum shader_stages stage) { @@ -109,29 +126,20 @@ update_uniforms(GLCtx *gl, enum shader_stages stage) } } -static PLATFORM_FILE_WATCH_CALLBACK_FN(reload_shader) +static void +reload_shader(GLCtx *gl, PlatformAPI *platform, u8 *path, u32 stage, s8 info, Arena a) { - shader_reload_ctx *ctx = user_ctx; - PlatformAPI *platform = &ctx->memory->platform_api; - Term *t = ctx->memory->memory; - Arena a = t->arena_for_frame; - Stream *err = &t->error_stream; - s8 fs_text = platform->read_file(path, &a); if (fs_text.len) { u32 program = program_from_shader_text(s8(VERTEX_SHADER_TEXT), fs_text, a); if (program) { - glDeleteProgram(t->gl.programs[ctx->stage]); - t->gl.programs[ctx->stage] = program; - update_uniforms(&t->gl, ctx->stage); - stream_push_s8(err, ctx->info); + glDeleteProgram(gl->programs[stage]); + gl->programs[stage] = program; + update_uniforms(gl, stage); + set_projection_matrix(gl, stage); } } - - if (err->widx) { - os_write_err_msg(stream_to_s8(err)); - err->widx = 0; - } + if (info.len) os_write_err_msg(info); } static s8 fs_name[SHADER_COUNT] = { @@ -141,32 +149,35 @@ static s8 fs_name[SHADER_COUNT] = { }; static void -reload_all_shaders(TerminalMemory *memory) +reload_all_shaders(GLCtx *gl, PlatformAPI *platform, Arena a) { - PlatformAPI *platform = &memory->platform_api; - Term *t = memory->memory; - - TempArena temp_memory = begin_temp_arena(&t->arena_for_frame); - - Stream fs_path = stream_alloc(&t->arena_for_frame, KB(4)); + Stream fs_path = stream_alloc(&a, KB(4)); stream_push_s8(&fs_path, g_shader_path_prefix); if (fs_path.widx && fs_path.buf[fs_path.widx - 1] != platform->path_separator) stream_push_byte(&fs_path, platform->path_separator); - shader_reload_ctx ctx = {0}; - ctx.memory = memory; - i32 sidx = fs_path.widx; for (u32 i = 0; i < SHADER_COUNT; i++) { stream_push_s8(&fs_path, fs_name[i]); stream_push_byte(&fs_path, 0); - ctx.stage = i; - reload_shader(fs_path.buf, &ctx); + reload_shader(gl, platform, fs_path.buf, i, (s8){0}, a); fs_path.widx = sidx; } os_write_err_msg(s8("Reloaded Shaders\n")); - end_temp_arena(temp_memory); +} + +static PLATFORM_FILE_WATCH_CALLBACK_FN(queue_shader_reload) +{ + queue_shader_reload_ctx *ctx = user_ctx; + i32 index = work_queue_push(&ctx->t->work_queue, ctx->t->work_queue_capacity); + /* NOTE(rnp): if we ever fill this up we need to resize the queue */ + ASSERT(index != -1); + work_queue_push_commit(&ctx->t->work_queue); + + ctx->path = path; + ctx->t->work_queue_items[index].type = WQ_SHADER_RELOAD; + ctx->t->work_queue_items[index].ctx = ctx; } static v4 @@ -192,24 +203,6 @@ pressed_last_frame(ButtonState *button) return result; } -static void -set_projection_matrix(GLCtx *gl) -{ - f32 w = gl->window_size.w; - f32 h = gl->window_size.h; - - f32 pmat[4 * 4] = { - 2.0/w, 0.0, 0.0, -1.0, - 0.0, 2.0/h, 0.0, -1.0, - 0.0, 0.0, -1.0, 0.0, - 0.0, 0.0, 0.0, 1.0, - }; - - glProgramUniformMatrix4fv(gl->programs[SHADER_RENDER], SHADER_PMAT_LOC, 1, GL_TRUE, pmat); - glProgramUniformMatrix4fv(gl->programs[SHADER_RECTS], SHADER_PMAT_LOC, 1, GL_TRUE, pmat); - glProgramUniformMatrix4fv(gl->programs[SHADER_POST], SHADER_PMAT_LOC, 1, GL_TRUE, pmat); -} - static v2 get_cell_size(FontAtlas *fa) { @@ -307,7 +300,8 @@ resize(Term *t, PlatformAPI *platform, iv2 window_size) sp->term_size_in_pixels = gl->window_size; sp->term_size_in_cells = t->size; - set_projection_matrix(gl); + for (u32 i = 0; i < SHADER_COUNT; i++) + set_projection_matrix(gl, i); gl->flags &= ~RESIZE_RENDERER; } @@ -1107,7 +1101,10 @@ DEBUG_EXPORT VTGL_INITIALIZE_FN(vtgl_initialize) initialize_framebuffer(&t->views[0].fb, t->size); initialize_framebuffer(&t->views[1].fb, t->size); - shader_reload_ctx *shader_ctxs = alloc(&a, shader_reload_ctx, SHADER_COUNT); + t->work_queue_items = alloc(&a, typeof(*t->work_queue_items), 1 << 6); + t->work_queue_capacity = 1 << 6; + + queue_shader_reload_ctx *reload_ctxs = alloc(&a, typeof(*reload_ctxs), SHADER_COUNT); s8 shader_infos[SHADER_COUNT] = { [SHADER_POST] = s8("Post Processing Shader Reloaded!\n"), @@ -1121,13 +1118,13 @@ DEBUG_EXPORT VTGL_INITIALIZE_FN(vtgl_initialize) if (path.widx && path.buf[path.widx - 1] != memory->platform_api.path_separator) stream_push_byte(&path, memory->platform_api.path_separator); - shader_reload_ctx *src = shader_ctxs + i; - src->info = shader_infos[i]; - src->stage = i; - src->memory = memory; + queue_shader_reload_ctx *src = reload_ctxs + i; + src->info = shader_infos[i]; + src->stage = i; + src->t = t; stream_push_s8(&path, fs_name[i]); stream_push_byte(&path, 0); - memory->platform_api.add_file_watch(path.buf, reload_shader, src); + memory->platform_api.add_file_watch(path.buf, queue_shader_reload, src); a.beg = path.buf + path.widx; } @@ -1212,7 +1209,7 @@ DEBUG_EXPORT VTGL_INITIALIZE_FN(vtgl_initialize) glActiveTexture(GL_TEXTURE0); - reload_all_shaders(memory); + reload_all_shaders(&t->gl, &memory->platform_api, a); return requested_size; } @@ -1221,9 +1218,7 @@ DEBUG_EXPORT VTGL_ACTIVE_SELECTION_FN(vtgl_active_selection) { Term *t = memory->memory; Range result = t->selection.range; - if (out) - stream_push_selection(out, t->views[t->view_idx].fb.rows, - t->selection.range, t->size.w); + if (out) stream_push_selection(out, t->views[t->view_idx].fb.rows, result, t->size.w); return result; } @@ -1237,8 +1232,22 @@ DEBUG_EXPORT VTGL_RENDER_FRAME_FN(vtgl_render_frame) TempArena temp_arena = begin_temp_arena(&arena); + i32 queue_item; + while ((queue_item = work_queue_pop(&t->work_queue, t->work_queue_capacity)) != -1) { + work_queue_pop_commit(&t->work_queue); + work_queue_entry *entry = t->work_queue_items + queue_item; + switch (entry->type) { + case WQ_SHADER_RELOAD: { + queue_shader_reload_ctx *ctx = entry->ctx; + reload_shader(&t->gl, &memory->platform_api, ctx->path, ctx->stage, + ctx->info, arena); + } break; + default: INVALID_CODE_PATH; + } + } + if (input->executable_reloaded) { - reload_all_shaders(memory); + reload_all_shaders(&t->gl, &memory->platform_api, arena); } /* NOTE: default state which can be overwritten later in the frame */ @@ -1380,7 +1389,8 @@ DEBUG_EXPORT VTGL_FRAME_STEP_FN(vtgl_frame_step) END_TIMED_BLOCK(); - return t->gl.queued_render || input->window_refreshed || t->gl.flags & DRAW_DEBUG_OVERLAY; + return t->gl.queued_render || input->window_refreshed || t->gl.flags & DRAW_DEBUG_OVERLAY + || !work_queue_empty(&t->work_queue, t->work_queue_capacity); } #ifdef _DEBUG diff --git a/vtgl.h b/vtgl.h @@ -27,6 +27,11 @@ #define DEBUG_EXPORT static #endif +#define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE); +#define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_RELEASE); +#define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) +#define atomic_exchange_n(ptr, val) __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST) + #define PI 3.1415926535897932384f #define KB(a) ((a) << 10ULL)