Commit: 7799e7695bfe6f863c15a872c4549f7c73741d94
Parent: 26e708a0acd69e6e7c9ecfe1ed0a0feb27251105
Author: Randy Palamar
Date: Sun, 8 Dec 2024 16:25:13 -0700
fix multithreaded shader reloading and add work queue
The main thread does not hold the GL context so it cannot reload
shaders. To solve this we add a work queue for the render thread
which can also be used for other things.
Diffstat:
6 files changed, 151 insertions(+), 81 deletions(-)
diff --git a/debug.h b/debug.h
@@ -105,11 +105,11 @@ typedef struct {
static DebugTable g_debug_table;
#define RECORD_DEBUG_EVENT_COMMON(counter, event_type) \
- u64 event_index = __atomic_fetch_add(&g_debug_table.event_array_event_index, 1, __ATOMIC_RELAXED); \
- ASSERT((event_index & 0xFFFFFFFF) < MAX_DEBUG_EVENT_COUNT); \
- DebugEvent *event = g_debug_table.events[event_index >> 32] + (event_index & 0xFFFFFFFF); \
- event->clock = __rdtsc(); \
- event->metadata_index = counter; \
+ u64 event_index = atomic_fetch_add(&g_debug_table.event_array_event_index, 1); \
+ ASSERT((event_index & 0xFFFFFFFF) < MAX_DEBUG_EVENT_COUNT); \
+ DebugEvent *event = g_debug_table.events[event_index >> 32] + (event_index & 0xFFFFFFFF); \
+ event->clock = __rdtsc(); \
+ event->metadata_index = counter; \
event->type = event_type
#define RECORD_DEBUG_EVENT(counter, event_type) \
diff --git a/platform_linux_common.c b/platform_linux_common.c
@@ -55,8 +55,6 @@
#define OS_MAP_READ PROT_READ
#define OS_MAP_PRIVATE MAP_PRIVATE
-#define atomic_exchange_n(ptr, val) __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST)
-
struct __attribute__((aligned(16))) stack_base {
void (*entry)(struct stack_base *stack);
Arena thread_arena;
diff --git a/util.c b/util.c
@@ -90,6 +90,56 @@ normalize_range(Range r)
return result;
}
+/* NOTE(rnp): based on nullprogram's lock-free, concurrent,
+ * generic queue in 32 bits */
+static i32
+work_queue_push(u32 *q, u32 capacity)
+{
+ ASSERT(ISPOWEROFTWO(capacity));
+ u32 r = atomic_load(q);
+ i32 mask = capacity - 1;
+ i32 head = r & mask;
+ i32 tail = (r >> 16) & mask;
+ i32 next = (head + 1) & mask;
+ /* NOTE(rnp): prevent an overflow into the tail on commit */
+ if (r & 0x8000) atomic_and(q, ~0x8000u);
+ return next == tail ? -1 : head;
+}
+
+static void
+work_queue_push_commit(u32 *q)
+{
+ atomic_fetch_add(q, 1);
+}
+
+static i32
+work_queue_pop(u32 *q, u32 capacity)
+{
+ ASSERT(ISPOWEROFTWO(capacity));
+ u32 r = atomic_load(q);
+ i32 mask = capacity - 1;
+ i32 head = r & mask;
+ i32 tail = (r >> 16) & mask;
+ return head == tail ? -1 : tail;
+}
+
+static void
+work_queue_pop_commit(u32 *q)
+{
+ atomic_fetch_add(q, 0x10000u);
+}
+
+static b32
+work_queue_empty(u32 *q, u32 capacity)
+{
+ ASSERT(ISPOWEROFTWO(capacity));
+ u32 r = atomic_load(q);
+ i32 mask = capacity - 1;
+ i32 head = r & mask;
+ i32 tail = (r >> 16) & mask;
+ return head == tail;
+}
+
static void
mem_copy(void *src, void *dest, size len)
{
diff --git a/util.h b/util.h
@@ -59,7 +59,6 @@ typedef struct {
} while(0)
typedef struct Variable {
- enum variable_type type;
union {
b32 b32;
u32 u32;
@@ -73,6 +72,7 @@ typedef struct Variable {
VariableLink group;
SLLVariableVector vector;
};
+ enum variable_type type;
} Variable;
enum cell_attribute {
@@ -148,14 +148,6 @@ typedef struct {
RingBuf log;
LineBuf lines;
Framebuffer fb;
- /* NOTE: the position of the cursor the last time a new line was blitted
- * and the index of the line. This is needed because we blit whole lines
- * at a time unlike traditional terminal emulators which just operate as
- * a state machine. Any time a line hasn't played to completion we must
- * restart it from the original location lest it unintentionally cause a
- * screen scroll. */
- iv2 last_cursor_pos;
- size last_line_idx;
} TermView;
enum terminal_mode {
@@ -433,6 +425,17 @@ typedef struct RenderCtx {
Arena a;
} RenderCtx;
+enum work_queue_entry_type {
+ WQ_FILL_RENDERBUFFER,
+ WQ_SHADER_RELOAD,
+ WQ_WINDOW_RESIZE,
+};
+
+typedef struct {
+ void *ctx;
+ enum work_queue_entry_type type;
+} work_queue_entry;
+
typedef struct Term {
GLCtx gl;
FontAtlas fa;
@@ -440,6 +443,10 @@ typedef struct Term {
Arena arena_for_frame;
TempArena temp_arena;
+ work_queue_entry *work_queue_items;
+ u32 work_queue_capacity;
+ u32 work_queue;
+
InteractionState interaction;
Selection selection;
diff --git a/vtgl.c b/vtgl.c
@@ -34,11 +34,21 @@
" gl_Position = u_Pmat * vec4(vertex_position, 0.0, 1.0);\n" \
"}\n"
-typedef struct {
- TerminalMemory *memory;
- s8 info;
- u32 stage;
-} shader_reload_ctx;
+static void
+set_projection_matrix(GLCtx *gl, u32 stage)
+{
+ f32 w = gl->window_size.w;
+ f32 h = gl->window_size.h;
+
+ f32 pmat[4 * 4] = {
+ 2.0/w, 0.0, 0.0, -1.0,
+ 0.0, 2.0/h, 0.0, -1.0,
+ 0.0, 0.0, -1.0, 0.0,
+ 0.0, 0.0, 0.0, 1.0,
+ };
+
+ glProgramUniformMatrix4fv(gl->programs[stage], SHADER_PMAT_LOC, 1, GL_TRUE, pmat);
+}
static u32
compile_shader(Arena a, u32 type, s8 shader)
@@ -93,6 +103,13 @@ program_from_shader_text(s8 vertex, s8 fragment, Arena a)
return pid;
}
+typedef struct {
+ Term *t;
+ u8 *path;
+ s8 info;
+ u32 stage;
+} queue_shader_reload_ctx;
+
static void
update_uniforms(GLCtx *gl, enum shader_stages stage)
{
@@ -109,29 +126,20 @@ update_uniforms(GLCtx *gl, enum shader_stages stage)
}
}
-static PLATFORM_FILE_WATCH_CALLBACK_FN(reload_shader)
+static void
+reload_shader(GLCtx *gl, PlatformAPI *platform, u8 *path, u32 stage, s8 info, Arena a)
{
- shader_reload_ctx *ctx = user_ctx;
- PlatformAPI *platform = &ctx->memory->platform_api;
- Term *t = ctx->memory->memory;
- Arena a = t->arena_for_frame;
- Stream *err = &t->error_stream;
-
s8 fs_text = platform->read_file(path, &a);
if (fs_text.len) {
u32 program = program_from_shader_text(s8(VERTEX_SHADER_TEXT), fs_text, a);
if (program) {
- glDeleteProgram(t->gl.programs[ctx->stage]);
- t->gl.programs[ctx->stage] = program;
- update_uniforms(&t->gl, ctx->stage);
- stream_push_s8(err, ctx->info);
+ glDeleteProgram(gl->programs[stage]);
+ gl->programs[stage] = program;
+ update_uniforms(gl, stage);
+ set_projection_matrix(gl, stage);
}
}
-
- if (err->widx) {
- os_write_err_msg(stream_to_s8(err));
- err->widx = 0;
- }
+ if (info.len) os_write_err_msg(info);
}
static s8 fs_name[SHADER_COUNT] = {
@@ -141,32 +149,35 @@ static s8 fs_name[SHADER_COUNT] = {
};
static void
-reload_all_shaders(TerminalMemory *memory)
+reload_all_shaders(GLCtx *gl, PlatformAPI *platform, Arena a)
{
- PlatformAPI *platform = &memory->platform_api;
- Term *t = memory->memory;
-
- TempArena temp_memory = begin_temp_arena(&t->arena_for_frame);
-
- Stream fs_path = stream_alloc(&t->arena_for_frame, KB(4));
+ Stream fs_path = stream_alloc(&a, KB(4));
stream_push_s8(&fs_path, g_shader_path_prefix);
if (fs_path.widx && fs_path.buf[fs_path.widx - 1] != platform->path_separator)
stream_push_byte(&fs_path, platform->path_separator);
- shader_reload_ctx ctx = {0};
- ctx.memory = memory;
-
i32 sidx = fs_path.widx;
for (u32 i = 0; i < SHADER_COUNT; i++) {
stream_push_s8(&fs_path, fs_name[i]);
stream_push_byte(&fs_path, 0);
- ctx.stage = i;
- reload_shader(fs_path.buf, &ctx);
+ reload_shader(gl, platform, fs_path.buf, i, (s8){0}, a);
fs_path.widx = sidx;
}
os_write_err_msg(s8("Reloaded Shaders\n"));
- end_temp_arena(temp_memory);
+}
+
+static PLATFORM_FILE_WATCH_CALLBACK_FN(queue_shader_reload)
+{
+ queue_shader_reload_ctx *ctx = user_ctx;
+ i32 index = work_queue_push(&ctx->t->work_queue, ctx->t->work_queue_capacity);
+ /* NOTE(rnp): if we ever fill this up we need to resize the queue */
+ ASSERT(index != -1);
+ work_queue_push_commit(&ctx->t->work_queue);
+
+ ctx->path = path;
+ ctx->t->work_queue_items[index].type = WQ_SHADER_RELOAD;
+ ctx->t->work_queue_items[index].ctx = ctx;
}
static v4
@@ -192,24 +203,6 @@ pressed_last_frame(ButtonState *button)
return result;
}
-static void
-set_projection_matrix(GLCtx *gl)
-{
- f32 w = gl->window_size.w;
- f32 h = gl->window_size.h;
-
- f32 pmat[4 * 4] = {
- 2.0/w, 0.0, 0.0, -1.0,
- 0.0, 2.0/h, 0.0, -1.0,
- 0.0, 0.0, -1.0, 0.0,
- 0.0, 0.0, 0.0, 1.0,
- };
-
- glProgramUniformMatrix4fv(gl->programs[SHADER_RENDER], SHADER_PMAT_LOC, 1, GL_TRUE, pmat);
- glProgramUniformMatrix4fv(gl->programs[SHADER_RECTS], SHADER_PMAT_LOC, 1, GL_TRUE, pmat);
- glProgramUniformMatrix4fv(gl->programs[SHADER_POST], SHADER_PMAT_LOC, 1, GL_TRUE, pmat);
-}
-
static v2
get_cell_size(FontAtlas *fa)
{
@@ -307,7 +300,8 @@ resize(Term *t, PlatformAPI *platform, iv2 window_size)
sp->term_size_in_pixels = gl->window_size;
sp->term_size_in_cells = t->size;
- set_projection_matrix(gl);
+ for (u32 i = 0; i < SHADER_COUNT; i++)
+ set_projection_matrix(gl, i);
gl->flags &= ~RESIZE_RENDERER;
}
@@ -1107,7 +1101,10 @@ DEBUG_EXPORT VTGL_INITIALIZE_FN(vtgl_initialize)
initialize_framebuffer(&t->views[0].fb, t->size);
initialize_framebuffer(&t->views[1].fb, t->size);
- shader_reload_ctx *shader_ctxs = alloc(&a, shader_reload_ctx, SHADER_COUNT);
+ t->work_queue_items = alloc(&a, typeof(*t->work_queue_items), 1 << 6);
+ t->work_queue_capacity = 1 << 6;
+
+ queue_shader_reload_ctx *reload_ctxs = alloc(&a, typeof(*reload_ctxs), SHADER_COUNT);
s8 shader_infos[SHADER_COUNT] = {
[SHADER_POST] = s8("Post Processing Shader Reloaded!\n"),
@@ -1121,13 +1118,13 @@ DEBUG_EXPORT VTGL_INITIALIZE_FN(vtgl_initialize)
if (path.widx && path.buf[path.widx - 1] != memory->platform_api.path_separator)
stream_push_byte(&path, memory->platform_api.path_separator);
- shader_reload_ctx *src = shader_ctxs + i;
- src->info = shader_infos[i];
- src->stage = i;
- src->memory = memory;
+ queue_shader_reload_ctx *src = reload_ctxs + i;
+ src->info = shader_infos[i];
+ src->stage = i;
+ src->t = t;
stream_push_s8(&path, fs_name[i]);
stream_push_byte(&path, 0);
- memory->platform_api.add_file_watch(path.buf, reload_shader, src);
+ memory->platform_api.add_file_watch(path.buf, queue_shader_reload, src);
a.beg = path.buf + path.widx;
}
@@ -1212,7 +1209,7 @@ DEBUG_EXPORT VTGL_INITIALIZE_FN(vtgl_initialize)
glActiveTexture(GL_TEXTURE0);
- reload_all_shaders(memory);
+ reload_all_shaders(&t->gl, &memory->platform_api, a);
return requested_size;
}
@@ -1221,9 +1218,7 @@ DEBUG_EXPORT VTGL_ACTIVE_SELECTION_FN(vtgl_active_selection)
{
Term *t = memory->memory;
Range result = t->selection.range;
- if (out)
- stream_push_selection(out, t->views[t->view_idx].fb.rows,
- t->selection.range, t->size.w);
+ if (out) stream_push_selection(out, t->views[t->view_idx].fb.rows, result, t->size.w);
return result;
}
@@ -1237,8 +1232,22 @@ DEBUG_EXPORT VTGL_RENDER_FRAME_FN(vtgl_render_frame)
TempArena temp_arena = begin_temp_arena(&arena);
+ i32 queue_item;
+ while ((queue_item = work_queue_pop(&t->work_queue, t->work_queue_capacity)) != -1) {
+ work_queue_pop_commit(&t->work_queue);
+ work_queue_entry *entry = t->work_queue_items + queue_item;
+ switch (entry->type) {
+ case WQ_SHADER_RELOAD: {
+ queue_shader_reload_ctx *ctx = entry->ctx;
+ reload_shader(&t->gl, &memory->platform_api, ctx->path, ctx->stage,
+ ctx->info, arena);
+ } break;
+ default: INVALID_CODE_PATH;
+ }
+ }
+
if (input->executable_reloaded) {
- reload_all_shaders(memory);
+ reload_all_shaders(&t->gl, &memory->platform_api, arena);
}
/* NOTE: default state which can be overwritten later in the frame */
@@ -1380,7 +1389,8 @@ DEBUG_EXPORT VTGL_FRAME_STEP_FN(vtgl_frame_step)
END_TIMED_BLOCK();
- return t->gl.queued_render || input->window_refreshed || t->gl.flags & DRAW_DEBUG_OVERLAY;
+ return t->gl.queued_render || input->window_refreshed || t->gl.flags & DRAW_DEBUG_OVERLAY
+ || !work_queue_empty(&t->work_queue, t->work_queue_capacity);
}
#ifdef _DEBUG
diff --git a/vtgl.h b/vtgl.h
@@ -27,6 +27,11 @@
#define DEBUG_EXPORT static
#endif
+#define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE);
+#define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_RELEASE);
+#define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE)
+#define atomic_exchange_n(ptr, val) __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST)
+
#define PI 3.1415926535897932384f
#define KB(a) ((a) << 10ULL)