Commit: 1e76faef3c5047f8eb14f7fadc64af28f775850a
Parent: f60c3ba8127f72cfee8d55a130526dd73ecebb3d
Author: Randy Palamar
Date:   Thu, 10 Apr 2025 05:33:34 -0600
core/lib: collapse upload commands into single method
Diffstat:
7 files changed, 171 insertions(+), 135 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -1,10 +1,15 @@
 /* See LICENSE for license details. */
 /* TODO(rnp):
- * - make channel_mapping, sparse_elements, focal_vectors into buffer backed textures.
- *   this way they can all use the same UPLOAD_SUBBUFFER command
- * - bake compute shader uniform indices (use push_compute_shader_header)
- * - reinvestigate ring buffer raw_data_ssbo ?
- * - START_COMPUTE command ?
+ * [ ]: bake compute shader uniform indices (use push_compute_shader_header)
+ * [ ]: refactor: BeamformGPUComputeContext
+ * [ ]: refactor: compute shader timers should be generated based on the pipeline stage limit
+ * [ ]: reinvestigate ring buffer raw_data_ssbo
+ *      - to minimize latency the main thread should manage the subbuffer upload so that the
+ *        compute thread can just keep computing. This way we can keep the copmute thread busy
+ *        with work while we image.
+ *      - In particular we will potentially need multiple GPUComputeContexts so that we
+ *        can overwrite one while the other is in use.
+ *      - make use of glFenceSync to guard buffer uploads
  */
 
 #include "beamformer.h"
@@ -31,14 +36,6 @@ typedef struct {
 	u32 needed_frames;
 } ComputeFrameIterator;
 
-static iz
-decoded_data_size(ComputeShaderCtx *cs)
-{
-	uv4 dim    = cs->dec_data_dim;
-	iz  result = 2 * sizeof(f32) * dim.x * dim.y * dim.z;
-	return result;
-}
-
 static uv3
 make_valid_test_dim(uv3 in)
 {
@@ -110,14 +107,13 @@ alloc_beamform_frame(GLParams *gp, BeamformFrame *out, ComputeShaderStats *out_s
 	}
 }
 
-static void
-alloc_shader_storage(BeamformerCtx *ctx, Arena a)
+function void
+alloc_shader_storage(BeamformerCtx *ctx, u32 rf_raw_size, Arena a)
 {
-	ComputeShaderCtx *cs     = &ctx->csctx;
+	ComputeShaderCtx     *cs = &ctx->csctx;
 	BeamformerParameters *bp = &ctx->shared_memory->parameters;
 
 	uv4 dec_data_dim = bp->dec_data_dim;
-	u32 rf_raw_size  = ctx->shared_memory->raw_data_size;
 	cs->dec_data_dim = dec_data_dim;
 	cs->rf_raw_size  = rf_raw_size;
 
@@ -130,7 +126,7 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
 	glNamedBufferStorage(cs->raw_data_ssbo, rf_raw_size, 0, storage_flags);
 	LABEL_GL_OBJECT(GL_BUFFER, cs->raw_data_ssbo, s8("Raw_RF_SSBO"));
 
-	iz rf_decoded_size = decoded_data_size(cs);
+	iz rf_decoded_size = 2 * sizeof(f32) * cs->dec_data_dim.x * cs->dec_data_dim.y * cs->dec_data_dim.z;
 	Stream label = stream_alloc(&a, 256);
 	stream_append_s8(&label, s8("Decoded_RF_SSBO_"));
 	u32 s_widx = label.widx;
@@ -575,64 +571,55 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
 				#undef X
 			}
 		} break;
-		case BW_UPLOAD_CHANNEL_MAPPING: {
-			ASSERT(!atomic_load(&ctx->shared_memory->channel_mapping_sync));
-			if (!cs->channel_mapping_texture) {
-				glCreateTextures(GL_TEXTURE_1D, 1, &cs->channel_mapping_texture);
-				glTextureStorage1D(cs->channel_mapping_texture, 1, GL_R16I,
-				                   ARRAY_COUNT(sm->channel_mapping));
-				LABEL_GL_OBJECT(GL_TEXTURE, cs->channel_mapping_texture, s8("Channel_Mapping"));
-			}
-			glTextureSubImage1D(cs->channel_mapping_texture, 0, 0,
-			                    ARRAY_COUNT(sm->channel_mapping), GL_RED_INTEGER,
-			                    GL_SHORT, sm->channel_mapping);
-		} break;
-		case BW_UPLOAD_FOCAL_VECTORS: {
-			ASSERT(!atomic_load(&ctx->shared_memory->focal_vectors_sync));
-			if (!cs->focal_vectors_texture) {
-				glCreateTextures(GL_TEXTURE_1D, 1, &cs->focal_vectors_texture);
-				glTextureStorage1D(cs->focal_vectors_texture, 1, GL_RG32F,
-				                   ARRAY_COUNT(sm->focal_vectors));
-				LABEL_GL_OBJECT(GL_TEXTURE, cs->focal_vectors_texture, s8("Focal_Vectors"));
-			}
-			glTextureSubImage1D(cs->focal_vectors_texture, 0, 0,
-			                    ARRAY_COUNT(sm->focal_vectors), GL_RG,
-			                    GL_FLOAT, sm->focal_vectors);
-		} break;
-		case BW_UPLOAD_PARAMETERS:
-		case BW_UPLOAD_PARAMETERS_HEAD:
-		case BW_UPLOAD_PARAMETERS_UI: {
+		case BW_UPLOAD_BUFFER: {
 			ASSERT(!atomic_load((i32 *)(barrier_offset + work->completion_barrier)));
-			glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->shared_memory->parameters),
-				             &ctx->shared_memory->parameters);
-			ctx->ui_read_params = work->type != BW_UPLOAD_PARAMETERS_HEAD && !work->generic;
-		} break;
-		case BW_UPLOAD_RF_DATA: {
-			ASSERT(!atomic_load(&ctx->shared_memory->raw_data_sync));
+			BeamformerUploadContext *uc = &work->upload_context;
+			u32 tex_type, tex_format, tex_element_count, tex_1d = 0, buffer = 0;
+			switch (uc->kind) {
+			case BU_KIND_CHANNEL_MAPPING: {
+				tex_1d            = cs->channel_mapping_texture;
+				tex_type          = GL_SHORT;
+				tex_format        = GL_RED_INTEGER;
+				tex_element_count = ARRAY_COUNT(sm->channel_mapping);
+			} break;
+			case BU_KIND_FOCAL_VECTORS: {
+				tex_1d            = cs->focal_vectors_texture;
+				tex_type          = GL_FLOAT;
+				tex_format        = GL_RG;
+				tex_element_count = ARRAY_COUNT(sm->focal_vectors);
+			} break;
+			case BU_KIND_SPARSE_ELEMENTS: {
+				tex_1d            = cs->sparse_elements_texture;
+				tex_type          = GL_SHORT;
+				tex_format        = GL_RED_INTEGER;
+				tex_element_count = ARRAY_COUNT(sm->sparse_elements);
+			} break;
+			case BU_KIND_PARAMETERS: {
+				ctx->ui_read_params = barrier_offset != 0;
+				buffer = cs->shared_ubo;
+			} break;
+			case BU_KIND_RF_DATA: {
+				if (cs->rf_raw_size != uc->size ||
+				    !uv4_equal(cs->dec_data_dim, bp->dec_data_dim))
+				{
+					alloc_shader_storage(ctx, uc->size, arena);
+				}
+				buffer = cs->raw_data_ssbo;
+			} break;
+			default: INVALID_CODE_PATH; break;
+			}
 
-			if (cs->rf_raw_size != ctx->shared_memory->raw_data_size ||
-			    !uv4_equal(cs->dec_data_dim, bp->dec_data_dim))
-			{
-				alloc_shader_storage(ctx, arena);
+			if (tex_1d) {
+				glTextureSubImage1D(tex_1d, 0, 0, tex_element_count, tex_format,
+				                    tex_type, (u8 *)sm + uc->shared_memory_offset);
 			}
 
-			glNamedBufferSubData(cs->raw_data_ssbo, 0, cs->rf_raw_size,
-			                     (u8 *)ctx->shared_memory + BEAMFORMER_RF_DATA_OFF);
-		} break;
-		case BW_UPLOAD_SPARSE_ELEMENTS: {
-			ASSERT(!atomic_load(&ctx->shared_memory->sparse_elements_sync));
-			if (!cs->sparse_elements_texture) {
-				glCreateTextures(GL_TEXTURE_1D, 1, &cs->sparse_elements_texture);
-				glTextureStorage1D(cs->sparse_elements_texture, 1, GL_R16I,
-				                   ARRAY_COUNT(sm->sparse_elements));
-				LABEL_GL_OBJECT(GL_TEXTURE, cs->sparse_elements_texture, s8("Sparse_Elements"));
+			if (buffer) {
+				glNamedBufferSubData(buffer, 0, uc->size,
+				                     (u8 *)sm + uc->shared_memory_offset);
 			}
-			glTextureSubImage1D(cs->sparse_elements_texture, 0, 0,
-			                    ARRAY_COUNT(sm->sparse_elements), GL_RED_INTEGER,
-			                    GL_SHORT, sm->sparse_elements);
 		} break;
 		case BW_COMPUTE: {
-			BeamformerParameters *bp = &ctx->shared_memory->parameters;
 			atomic_store(&cs->processing_compute, 1);
 			start_renderdoc_capture(gl_context);
 
@@ -660,8 +647,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
 			frame->frame.compound_count = bp->dec_data_dim.z;
 
 			b32 did_sum_shader = 0;
-			u32 stage_count = ctx->shared_memory->compute_stages_count;
-			ComputeShaderID *stages = ctx->shared_memory->compute_stages;
+			u32 stage_count = sm->compute_stages_count;
+			ComputeShaderID *stages = sm->compute_stages;
 			for (u32 i = 0; i < stage_count; i++) {
 				did_sum_shader |= stages[i] == CS_SUM;
 				frame->stats.timer_active[stages[i]] = 1;
@@ -721,6 +708,28 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
 	}
 }
 
+DEBUG_EXPORT BEAMFORMER_COMPUTE_SETUP_FN(beamformer_compute_setup)
+{
+	BeamformerCtx          *ctx = (BeamformerCtx *)user_context;
+	BeamformerSharedMemory *sm  = ctx->shared_memory;
+	ComputeShaderCtx       *cs  = &ctx->csctx;
+
+	glCreateBuffers(1, &cs->shared_ubo);
+	glNamedBufferStorage(cs->shared_ubo, sizeof(sm->parameters), 0, GL_DYNAMIC_STORAGE_BIT);
+
+	glCreateTextures(GL_TEXTURE_1D, 1, &cs->channel_mapping_texture);
+	glCreateTextures(GL_TEXTURE_1D, 1, &cs->sparse_elements_texture);
+	glCreateTextures(GL_TEXTURE_1D, 1, &cs->focal_vectors_texture);
+	glTextureStorage1D(cs->channel_mapping_texture, 1, GL_R16I,  ARRAY_COUNT(sm->channel_mapping));
+	glTextureStorage1D(cs->sparse_elements_texture, 1, GL_R16I,  ARRAY_COUNT(sm->sparse_elements));
+	glTextureStorage1D(cs->focal_vectors_texture,   1, GL_RG32F, ARRAY_COUNT(sm->focal_vectors));
+
+	LABEL_GL_OBJECT(GL_TEXTURE, cs->channel_mapping_texture, s8("Channel_Mapping"));
+	LABEL_GL_OBJECT(GL_TEXTURE, cs->focal_vectors_texture,   s8("Focal_Vectors"));
+	LABEL_GL_OBJECT(GL_TEXTURE, cs->sparse_elements_texture, s8("Sparse_Elements"));
+	LABEL_GL_OBJECT(GL_BUFFER,  cs->shared_ubo,              s8("Beamformer_Parameters"));
+}
+
 DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute)
 {
 	BeamformerCtx *ctx = (BeamformerCtx *)user_context;
diff --git a/beamformer.h b/beamformer.h
@@ -61,6 +61,7 @@ typedef struct {
 } FragmentShaderCtx;
 
 #include "beamformer_parameters.h"
+#include "beamformer_work_queue.h"
 
 #define CS_UNIFORMS \
 	X(CS_MIN_MAX, mips_level)   \
@@ -69,19 +70,18 @@ typedef struct {
 typedef struct {
 	u32 programs[CS_LAST];
 
-	u32 raw_data_ssbo;
-
 	/* NOTE: Decoded data is only relevant in the context of a single frame. We use two
 	 * buffers so that they can be swapped when chaining multiple compute stages */
 	u32 rf_data_ssbos[2];
 	u32 last_output_ssbo_index;
-	u32 hadamard_texture;
 
+	u32 raw_data_ssbo;
 	u32 shared_ubo;
 
 	u32 channel_mapping_texture;
 	u32 sparse_elements_texture;
 	u32 focal_vectors_texture;
+	u32 hadamard_texture;
 
 	f32 processing_progress;
 	b32 processing_compute;
@@ -135,8 +135,6 @@ struct BeamformComputeFrame {
 	b32 ready_to_present;
 };
 
-#include "beamformer_work_queue.h"
-
 #define GL_PARAMETERS \
 	X(MAJOR_VERSION,                   version_major,                   "")      \
 	X(MINOR_VERSION,                   version_minor,                   "")      \
@@ -203,6 +201,9 @@ struct ComputeShaderReloadContext {
                                                  BeamformerInput *input)
 typedef BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step_fn);
 
+#define BEAMFORMER_COMPUTE_SETUP_FN(name) void name(iptr user_context, Arena arena, iptr gl_context)
+typedef BEAMFORMER_COMPUTE_SETUP_FN(beamformer_compute_setup_fn);
+
 #define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(iptr user_context, Arena arena, iptr gl_context)
 typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn);
 
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -1,5 +1,11 @@
 /* See LICENSE for license details. */
 
+/* TODO(rnp):
+ * [ ]: Have a method for the library caller to take ownership of a "compute context"
+ * [ ]: Upload previously exported data for display. maybe this is a UI thing but doing it
+ *      programatically would be nice.
+ */
+
 /* X(enumarant, number, shader file name, needs header, pretty name) */
 #define COMPUTE_SHADERS \
 	X(CUDA_DECODE,           0, "",         0, "CUDA Decoding")   \
diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h
@@ -10,15 +10,24 @@ typedef enum {
 	BW_RELOAD_SHADER,
 	BW_SAVE_FRAME,
 	BW_SEND_FRAME,
-	BW_UPLOAD_CHANNEL_MAPPING,
-	BW_UPLOAD_FOCAL_VECTORS,
-	BW_UPLOAD_PARAMETERS,
-	BW_UPLOAD_PARAMETERS_HEAD,
-	BW_UPLOAD_PARAMETERS_UI,
-	BW_UPLOAD_RF_DATA,
-	BW_UPLOAD_SPARSE_ELEMENTS,
+	BW_UPLOAD_BUFFER,
 } BeamformWorkType;
 
+typedef enum {
+	BU_KIND_CHANNEL_MAPPING,
+	BU_KIND_FOCAL_VECTORS,
+	BU_KIND_PARAMETERS,
+	BU_KIND_RF_DATA,
+	BU_KIND_SPARSE_ELEMENTS,
+	BU_KIND_LAST,
+} BeamformerUploadKind;
+
+typedef struct {
+	i32 size;
+	i32 shared_memory_offset;
+	BeamformerUploadKind kind;
+} BeamformerUploadContext;
+
 typedef struct {
 	BeamformComputeFrame *frame;
 	iptr                  file_handle;
@@ -28,6 +37,7 @@ typedef struct {
 typedef struct {
 	union {
 		BeamformComputeFrame       *frame;
+		BeamformerUploadContext     upload_context;
 		BeamformOutputFrameContext  output_frame_ctx;
 		ComputeShaderReloadContext *reload_shader_ctx;
 		void                       *generic;
@@ -80,13 +90,10 @@ typedef struct {
 	i32 parameters_sync;
 	i32 parameters_head_sync;
 	i32 parameters_ui_sync;
-
 	i32 focal_vectors_sync;
 	i32 channel_mapping_sync;
 	i32 sparse_elements_sync;
-
 	i32 raw_data_sync;
-	u32 raw_data_size;
 
 	i32           dispatch_compute_sync;
 	ImagePlaneTag current_image_plane;
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -260,6 +260,28 @@ beamformer_start_compute(char *shm_name, u32 image_plane_tag)
 	return result;
 }
 
+function b32
+beamformer_upload_buffer(char *shm_name, void *data, u32 size, i32 store_offset, i32 sync_offset,
+                         BeamformerUploadKind kind, i32 timeout_ms)
+{
+	b32 result = check_shared_memory(shm_name);
+	if (result) {
+		BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
+		result = work && try_wait_sync((i32 *)((u8 *)g_bp + sync_offset), timeout_ms, os_wait_on_value);
+		if (result) {
+			BeamformerUploadContext *uc = &work->upload_context;
+			uc->shared_memory_offset = store_offset;
+			uc->size = size;
+			uc->kind = kind;
+			work->type = BW_UPLOAD_BUFFER;
+			work->completion_barrier = sync_offset;
+			mem_copy((u8 *)g_bp + store_offset, data, size);
+			beamform_work_queue_push_commit(&g_bp->external_work_queue);
+		}
+	}
+	return result;
+}
+
 #define BEAMFORMER_UPLOAD_FNS \
 	X(channel_mapping, i16, CHANNEL_MAPPING) \
 	X(sparse_elements, i16, SPARSE_ELEMENTS) \
@@ -267,68 +289,55 @@ beamformer_start_compute(char *shm_name, u32 image_plane_tag)
 
 #define X(name, dtype, command) \
 b32 beamformer_push_##name (char *shm_id, dtype *data, u32 count, i32 timeout_ms) { \
-	b32 result = check_shared_memory(shm_id) && count <= ARRAY_COUNT(g_bp->name);             \
-	if (result) {                                                                             \
-		BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);        \
-		result = work && try_wait_sync(&g_bp->name##_sync, timeout_ms, os_wait_on_value); \
-		if (result) {                                                                     \
-			work->type = BW_UPLOAD_##command;                                         \
-			work->completion_barrier = offsetof(BeamformerSharedMemory, name##_sync); \
-			mem_copy(g_bp->name, data, count * sizeof(*g_bp->name));                  \
-			beamform_work_queue_push_commit(&g_bp->external_work_queue);              \
-		}                                                                                 \
-	}                                                                                         \
-	return result;                                                                            \
+	b32 result = count <= ARRAY_COUNT(g_bp->name);                                           \
+	if (result) {                                                                            \
+		result = beamformer_upload_buffer(shm_id, data, count * sizeof(dtype),           \
+		                                  offsetof(BeamformerSharedMemory, name),        \
+		                                  offsetof(BeamformerSharedMemory, name##_sync), \
+		                                  BU_KIND_##command, timeout_ms);                \
+	}                                                                                        \
+	return result;                                                                           \
 }
 BEAMFORMER_UPLOAD_FNS
 #undef X
 
 b32
-beamformer_push_data(char *shm_name, void *data, u32 data_size, i32 timeout_ms)
+beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms)
 {
-	b32 result = data_size <= BEAMFORMER_MAX_RF_DATA_SIZE && check_shared_memory(shm_name);
-	if (result) {
-		BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
-		result = work && try_wait_sync(&g_bp->raw_data_sync, timeout_ms, os_wait_on_value);
-		if (result) {
-			work->type = BW_UPLOAD_RF_DATA;
-			work->completion_barrier = offsetof(BeamformerSharedMemory, raw_data_sync);
-			mem_copy((u8 *)g_bp + BEAMFORMER_RF_DATA_OFF, data, data_size);
-			g_bp->raw_data_size = data_size;
-			beamform_work_queue_push_commit(&g_bp->external_work_queue);
-		}
-	}
+	b32 result = beamformer_upload_buffer(shm_name, bp, sizeof(*bp),
+	                                      offsetof(BeamformerSharedMemory, parameters),
+	                                      offsetof(BeamformerSharedMemory, parameters_sync),
+	                                      BU_KIND_PARAMETERS, timeout_ms);
 	return result;
 }
 
 b32
-beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms)
+beamformer_push_data(char *shm_name, void *data, u32 data_size, i32 timeout_ms)
 {
-	b32 result = check_shared_memory(shm_name);
+	b32 result = data_size <= BEAMFORMER_MAX_RF_DATA_SIZE;
 	if (result) {
-		BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
-		result = work && try_wait_sync(&g_bp->parameters_sync, timeout_ms, os_wait_on_value);
-		if (result) {
-			work->type = BW_UPLOAD_PARAMETERS;
-			work->completion_barrier = offsetof(BeamformerSharedMemory, parameters_sync);
-			mem_copy(&g_bp->parameters, bp, sizeof(g_bp->parameters));
-			beamform_work_queue_push_commit(&g_bp->external_work_queue);
-		}
+		result = beamformer_upload_buffer(shm_name, data, data_size, BEAMFORMER_RF_DATA_OFF,
+		                                  offsetof(BeamformerSharedMemory, raw_data_sync),
+		                                  BU_KIND_RF_DATA, timeout_ms);
 	}
 	return result;
 }
 
 b32
-beamformer_push_ui_parameters(char *shm_name, BeamformerUIParameters *bp, i32 timeout_ms)
+beamformer_push_parameters_ui(char *shm_name, BeamformerUIParameters *bp, i32 timeout_ms)
 {
 	b32 result = check_shared_memory(shm_name);
 	if (result) {
 		BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
 		result = work && try_wait_sync(&g_bp->parameters_ui_sync, timeout_ms, os_wait_on_value);
 		if (result) {
-			work->type = BW_UPLOAD_PARAMETERS_UI;
+			BeamformerUploadContext *uc = &work->upload_context;
+			uc->shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
+			uc->size = sizeof(g_bp->parameters);
+			uc->kind = BU_KIND_PARAMETERS;
+			work->type = BW_UPLOAD_BUFFER;
 			work->completion_barrier = offsetof(BeamformerSharedMemory, parameters_ui_sync);
-			mem_copy(&g_bp->parameters_ui, bp, sizeof(g_bp->parameters_ui));
+			mem_copy(&g_bp->parameters_ui, bp, sizeof(*bp));
 			beamform_work_queue_push_commit(&g_bp->external_work_queue);
 		}
 	}
@@ -343,9 +352,13 @@ beamformer_push_parameters_head(char *shm_name, BeamformerParametersHead *bp, i3
 		BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
 		result = work && try_wait_sync(&g_bp->parameters_head_sync, timeout_ms, os_wait_on_value);
 		if (result) {
-			work->type = BW_UPLOAD_PARAMETERS_HEAD;
+			BeamformerUploadContext *uc = &work->upload_context;
+			uc->shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
+			uc->size = sizeof(g_bp->parameters);
+			uc->kind = BU_KIND_PARAMETERS;
+			work->type = BW_UPLOAD_BUFFER;
 			work->completion_barrier = offsetof(BeamformerSharedMemory, parameters_head_sync);
-			mem_copy(&g_bp->parameters_head, bp, sizeof(g_bp->parameters_head));
+			mem_copy(&g_bp->parameters_head, bp, sizeof(*bp));
 			beamform_work_queue_push_commit(&g_bp->external_work_queue);
 		}
 	}
diff --git a/static.c b/static.c
@@ -10,6 +10,7 @@ static void *debug_lib;
 #define DEBUG_ENTRY_POINTS \
 	X(beamformer_frame_step)           \
 	X(beamformer_complete_compute)     \
+	X(beamformer_compute_setup)        \
 	X(beamform_work_queue_push)        \
 	X(beamform_work_queue_push_commit)
 
@@ -252,6 +253,8 @@ static OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
 	glfwMakeContextCurrent(ctx->window_handle);
 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
 
+	beamformer_compute_setup(ctx->user_context, ctx->arena, ctx->gl_context);
+
 	for (;;) {
 		for (;;) {
 			i32 current = atomic_load(&ctx->sync_variable);
@@ -336,11 +339,6 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
 	glEnable(GL_DEBUG_OUTPUT);
 #endif
 
-	/* NOTE: allocate space for Uniform Buffer but don't send anything yet */
-	glCreateBuffers(1, &ctx->csctx.shared_ubo);
-	glNamedBufferStorage(ctx->csctx.shared_ubo, sizeof(BeamformerParameters), 0, GL_DYNAMIC_STORAGE_BIT);
-	LABEL_GL_OBJECT(GL_BUFFER, ctx->csctx.shared_ubo, s8("Beamformer_Parameters"));
-
 	#define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) {                 \
 		ComputeShaderReloadContext *csr = push_struct(memory, typeof(*csr)); \
 		csr->beamformer_ctx = ctx;                                           \
diff --git a/ui.c b/ui.c
@@ -2449,11 +2449,13 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw
 		validate_ui_parameters(&ui->params);
 		BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
 		if (work && try_wait_sync(&ctx->shared_memory->parameters_sync, 0, ctx->os.wait_on_value)) {
-			work->generic            = (void *)1;
-			work->type               = BW_UPLOAD_PARAMETERS;
+			BeamformerUploadContext *uc = &work->upload_context;
+			uc->shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
+			uc->size = sizeof(ctx->shared_memory->parameters);
+			uc->kind = BU_KIND_PARAMETERS;
+			work->type = BW_UPLOAD_BUFFER;
 			work->completion_barrier = (iptr)&ctx->shared_memory->parameters_sync;
-			mem_copy(&ctx->shared_memory->parameters.output_min_coordinate,
-			         &ui->params, sizeof(ui->params));
+			mem_copy(&ctx->shared_memory->parameters_ui, &ui->params, sizeof(ui->params));
 			beamform_work_queue_push_commit(ctx->beamform_work_queue);
 			ui->flush_params   = 0;
 			ctx->start_compute = 1;