Commit: 8408acf219120932b5db273588d2542b87954356
Parent: 2959f4674a28a6da90eeea06bba09f5f783f2650
Author: Randy Palamar
Date: Sun, 6 Apr 2025 16:10:57 -0600
core/lib: use upload parameters command
Diffstat:
10 files changed, 86 insertions(+), 72 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -106,7 +106,7 @@ static void
alloc_shader_storage(BeamformerCtx *ctx, Arena a)
{
ComputeShaderCtx *cs = &ctx->csctx;
- BeamformerParameters *bp = &ctx->shared_memory->raw;
+ BeamformerParameters *bp = &ctx->shared_memory->parameters;
uv4 dec_data_dim = bp->dec_data_dim;
u32 rf_raw_size = ctx->shared_memory->raw_data_size;
@@ -388,7 +388,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
ASSERT(frame >= ctx->beamform_frames);
ASSERT(frame < ctx->beamform_frames + ARRAY_COUNT(ctx->beamform_frames));
u32 base_index = (u32)(frame - ctx->beamform_frames);
- u32 to_average = ctx->shared_memory->raw.output_points.w;
+ u32 to_average = ctx->shared_memory->parameters.output_points.w;
u32 frame_count = 0;
u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES);
ComputeFrameIterator cfi = compute_frame_iterator(ctx, 1 + base_index - to_average,
@@ -552,7 +552,7 @@ static void
complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_context, iz barrier_offset)
{
ComputeShaderCtx *cs = &ctx->csctx;
- BeamformerParameters *bp = &ctx->shared_memory->raw;
+ BeamformerParameters *bp = &ctx->shared_memory->parameters;
BeamformerSharedMemory *sm = ctx->shared_memory;
BeamformWork *work = beamform_work_queue_pop(q);
@@ -607,6 +607,12 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
ARRAY_COUNT(sm->focal_vectors), GL_RG,
GL_FLOAT, sm->focal_vectors);
} break;
+ case BW_UPLOAD_PARAMETERS: {
+ ASSERT(!atomic_load(&ctx->shared_memory->parameters_sync));
+ glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->shared_memory->parameters),
+ &ctx->shared_memory->parameters);
+ ctx->ui_read_params = !work->generic;
+ } break;
case BW_UPLOAD_RF_DATA: {
ASSERT(!atomic_load(&ctx->shared_memory->raw_data_sync));
@@ -641,25 +647,20 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
GL_SHORT, sm->sparse_elements);
} break;
case BW_COMPUTE: {
+ BeamformerParameters *bp = &ctx->shared_memory->parameters;
atomic_store(&cs->processing_compute, 1);
start_renderdoc_capture(gl_context);
BeamformComputeFrame *frame = work->frame;
- if (ctx->shared_memory->upload) {
- glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->shared_memory->raw),
- &ctx->shared_memory->raw);
- ctx->shared_memory->upload = 0;
- }
-
if (cs->programs[CS_DAS])
glProgramUniform1ui(cs->programs[CS_DAS], cs->cycle_t_id, cycle_t++);
- uv3 try_dim = make_valid_test_dim(ctx->shared_memory->raw.output_points.xyz);
+ uv3 try_dim = make_valid_test_dim(bp->output_points.xyz);
if (!uv3_equal(try_dim, frame->frame.dim))
alloc_beamform_frame(&ctx->gl, &frame->frame, &frame->stats, try_dim,
s8("Beamformed_Data"), arena);
- if (ctx->shared_memory->raw.output_points.w > 1) {
+ if (bp->output_points.w > 1) {
if (!uv3_equal(try_dim, ctx->averaged_frames[0].frame.dim)) {
alloc_beamform_frame(&ctx->gl, &ctx->averaged_frames[0].frame,
&ctx->averaged_frames[0].stats,
@@ -671,10 +672,10 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
}
frame->in_flight = 1;
- frame->frame.min_coordinate = ctx->shared_memory->raw.output_min_coordinate;
- frame->frame.max_coordinate = ctx->shared_memory->raw.output_max_coordinate;
- frame->frame.das_shader_id = ctx->shared_memory->raw.das_shader_id;
- frame->frame.compound_count = ctx->shared_memory->raw.dec_data_dim.z;
+ frame->frame.min_coordinate = bp->output_min_coordinate;
+ frame->frame.max_coordinate = bp->output_max_coordinate;
+ frame->frame.das_shader_id = bp->das_shader_id;
+ frame->frame.compound_count = bp->dec_data_dim.z;
b32 did_sum_shader = 0;
u32 stage_count = ctx->shared_memory->compute_stages_count;
@@ -761,7 +762,7 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
DEBUG_DECL(end_frame_capture = ctx->os.end_frame_capture);
}
- BeamformerParameters *bp = &ctx->shared_memory->raw;
+ BeamformerParameters *bp = &ctx->shared_memory->parameters;
if (ctx->shared_memory->start_compute) {
ctx->shared_memory->start_compute = 0;
BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
@@ -776,7 +777,7 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
iptr f = ctx->os.open_for_write(ctx->shared_memory->export_pipe_name);
export->type = BW_SAVE_FRAME;
export->output_frame_ctx.file_handle = f;
- if (ctx->shared_memory->raw.output_points.w > 1) {
+ if (bp->output_points.w > 1) {
u32 a_index = !(ctx->averaged_frame_index %
ARRAY_COUNT(ctx->averaged_frames));
BeamformComputeFrame *aframe = ctx->averaged_frames + a_index;
@@ -789,11 +790,6 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
ctx->shared_memory->export_next_frame = 0;
}
- if (ctx->shared_memory->upload) {
- /* TODO(rnp): clean this up */
- ctx->ui_read_params = 1;
- }
-
ctx->os.wake_waiters(&ctx->os.compute_worker.sync_variable);
}
}
diff --git a/beamformer_work_queue.c b/beamformer_work_queue.c
@@ -50,3 +50,21 @@ DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit)
{
atomic_add(&q->queue, 1);
}
+
+static b32
+try_wait_sync(i32 *sync, i32 timeout_ms, os_wait_on_value_fn *os_wait_on_value)
+{
+ b32 result = 0;
+ for (;;) {
+ i32 current = atomic_load(sync);
+ if (current) {
+ atomic_inc(sync, -current);
+ result = 1;
+ break;
+ } else if (!timeout_ms) {
+ break;
+ }
+ os_wait_on_value(sync, 0, timeout_ms);
+ }
+ return result;
+}
diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h
@@ -12,7 +12,7 @@ typedef enum {
BW_SEND_FRAME,
BW_UPLOAD_CHANNEL_MAPPING,
BW_UPLOAD_FOCAL_VECTORS,
- BW_UPLOAD_PARAMS,
+ BW_UPLOAD_PARAMETERS,
BW_UPLOAD_RF_DATA,
BW_UPLOAD_SPARSE_ELEMENTS,
} BeamformWorkType;
@@ -57,7 +57,7 @@ typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn);
#define BEAMFORMER_MAX_RF_DATA_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - BEAMFORMER_RF_DATA_OFF)
typedef struct {
- BeamformerParameters raw;
+ BeamformerParameters parameters;
ComputeShaderID compute_stages[16];
u32 compute_stages_count;
@@ -66,10 +66,10 @@ typedef struct {
u32 raw_data_size;
/* TODO(rnp): these shouldn't be needed */
- b32 upload;
b32 start_compute;
b32 export_next_frame;
+ i32 parameters_sync;
i32 channel_mapping_sync;
i32 sparse_elements_sync;
i32 focal_vectors_sync;
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -71,11 +71,10 @@ void mexWarnMsgIdAndTxt(const c8*, c8*, ...);
#endif
#if defined(__linux__)
-static void
-os_wait_on_value(i32 *value, i32 current, u32 timeout_ms)
+static OS_WAIT_ON_VALUE_FN(os_wait_on_value)
{
struct timespec *timeout = 0, timeout_value;
- if (timeout_ms != U32_MAX) {
+ if (timeout_ms != -1) {
timeout_value.tv_sec = timeout_ms / 1000;
timeout_value.tv_nsec = (timeout_ms % 1000) * 1000000;
timeout = &timeout_value;
@@ -134,11 +133,10 @@ os_open_shared_memory_area(char *name)
#elif defined(_WIN32)
-static void
-os_wait_on_value(i32 *value, i32 current, u32 timeout_ms)
+static OS_WAIT_ON_VALUE_FN(os_wait_on_value)
{
i64 *timeout = 0, timeout_value;
- if (timeout_ms != U32_MAX) {
+ if (timeout_ms != -1) {
/* TODO(rnp): not sure about this one, but this is how wine converts the ms */
timeout_value = -(i64)timeout_ms * 10000;
timeout = &timeout_value;
@@ -211,24 +209,6 @@ os_open_shared_memory_area(char *name)
#endif
static b32
-try_wait_sync(i32 *sync, i32 timeout_ms)
-{
- b32 result = 0;
- for (;;) {
- i32 current = atomic_load(sync);
- if (current) {
- atomic_inc(sync, -current);
- result = 1;
- break;
- } else if (!timeout_ms) {
- break;
- }
- os_wait_on_value(sync, 0, timeout_ms);
- }
- return result;
-}
-
-static b32
check_shared_memory(char *name)
{
if (!g_bp) {
@@ -280,7 +260,7 @@ b32 beamformer_push_##name (char *shm_id, dtype *data, u32 count, i32 timeout_ms
b32 result = check_shared_memory(shm_id) && count <= ARRAY_COUNT(g_bp->name); \
if (result) { \
BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); \
- result = work && try_wait_sync(&g_bp->name##_sync, timeout_ms); \
+ result = work && try_wait_sync(&g_bp->name##_sync, timeout_ms, os_wait_on_value); \
if (result) { \
work->type = BW_UPLOAD_##command; \
work->completion_barrier = offsetof(BeamformerSharedMemory, name##_sync); \
@@ -294,6 +274,23 @@ BEAMFORMER_UPLOAD_FNS
#undef X
b32
+beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms)
+{
+ b32 result = check_shared_memory(shm_name);
+ if (result) {
+ BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
+ result = work && try_wait_sync(&g_bp->parameters_sync, timeout_ms, os_wait_on_value);
+ if (result) {
+ work->type = BW_UPLOAD_PARAMETERS;
+ work->completion_barrier = offsetof(BeamformerSharedMemory, parameters_sync);
+ mem_copy(&g_bp->parameters, bp, sizeof(g_bp->parameters));
+ beamform_work_queue_push_commit(&g_bp->external_work_queue);
+ }
+ }
+ return result;
+}
+
+b32
set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp)
{
b32 result = 0;
@@ -305,12 +302,7 @@ set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp)
for (u32 i = 0; i < ARRAY_COUNT(focal_vectors); i++)
focal_vectors[i] = (v2){{new_bp->transmit_angles[i], new_bp->focal_depths[i]}};
result |= beamformer_push_focal_vectors(shm_name, (f32 *)focal_vectors, ARRAY_COUNT(focal_vectors), 0);
-
- if (result) {
- mem_copy(&g_bp->raw, &new_bp->xdc_transform, sizeof(g_bp->raw));
- g_bp->upload = 1;
- }
-
+ result |= beamformer_push_parameters(shm_name, (BeamformerParameters *)&new_bp->xdc_transform, 0);
return result;
}
@@ -355,9 +347,9 @@ beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data
if (output_points.z == 0) output_points.z = 1;
output_points.w = 1;
- g_bp->raw.output_points.x = output_points.x;
- g_bp->raw.output_points.y = output_points.y;
- g_bp->raw.output_points.z = output_points.z;
+ g_bp->parameters.output_points.x = output_points.x;
+ g_bp->parameters.output_points.y = output_points.y;
+ g_bp->parameters.output_points.z = output_points.z;
g_bp->export_next_frame = 1;
s8 export_name = s8(OS_EXPORT_PIPE_NAME);
diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h
@@ -36,3 +36,4 @@ LIB_FN b32 beamform_data_synchronized(char *pipe_name, char *shm_name, void *dat
LIB_FN b32 beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms);
LIB_FN b32 beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms);
LIB_FN b32 beamformer_push_focal_vectors(char *shm_name, f32 *vectors, u32 count, i32 timeout_ms);
+LIB_FN b32 beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms);
diff --git a/os_linux.c b/os_linux.c
@@ -252,11 +252,10 @@ os_create_thread(Arena arena, iptr user_context, s8 name, os_thread_entry_point_
return (iptr)result;
}
-static void
-os_wait_on_value(i32 *value, i32 current, u32 timeout_ms)
+static OS_WAIT_ON_VALUE_FN(os_wait_on_value)
{
struct timespec *timeout = 0, timeout_value;
- if (timeout_ms != U32_MAX) {
+ if (timeout_ms != -1) {
timeout_value.tv_sec = timeout_ms / 1000;
timeout_value.tv_nsec = (timeout_ms % 1000) * 1000000;
timeout = &timeout_value;
diff --git a/os_win32.c b/os_win32.c
@@ -319,11 +319,10 @@ os_create_thread(Arena arena, iptr user_context, s8 name, os_thread_entry_point_
return result;
}
-static void
-os_wait_on_value(i32 *value, i32 current, u32 timeout_ms)
+static OS_WAIT_ON_VALUE_FN(os_wait_on_value)
{
i64 *timeout = 0, timeout_value;
- if (timeout_ms != U32_MAX) {
+ if (timeout_ms != -1) {
/* TODO(rnp): not sure about this one, but this is how wine converts the ms */
timeout_value = -(i64)timeout_ms * 10000;
timeout = &timeout_value;
diff --git a/static.c b/static.c
@@ -306,6 +306,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
if (!ctx->shared_memory)
os_fatal(s8("Get more ram lol\n"));
/* TODO(rnp): refactor - this is annoying */
+ ctx->shared_memory->parameters_sync = 1;
ctx->shared_memory->raw_data_sync = 1;
ctx->shared_memory->channel_mapping_sync = 1;
ctx->shared_memory->sparse_elements_sync = 1;
diff --git a/ui.c b/ui.c
@@ -2380,7 +2380,7 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw
/* TODO(rnp): there should be a better way of detecting this */
if (ctx->ui_read_params) {
- mem_copy(&ui->params, &ctx->shared_memory->raw.output_min_coordinate, sizeof(ui->params));
+ mem_copy(&ui->params, &ctx->shared_memory->parameters.output_min_coordinate, sizeof(ui->params));
ui->flush_params = 0;
ctx->ui_read_params = 0;
}
@@ -2391,12 +2391,16 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw
if (ui->flush_params) {
validate_ui_parameters(&ui->params);
- if (!ctx->csctx.processing_compute) {
- mem_copy(&ctx->shared_memory->raw.output_min_coordinate, &ui->params,
- sizeof(ui->params));
- ui->flush_params = 0;
- ctx->shared_memory->upload = 1;
- ctx->start_compute = 1;
+ BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
+ if (work && try_wait_sync(&ctx->shared_memory->parameters_sync, 0, ctx->os.wait_on_value)) {
+ work->generic = (void *)1;
+ work->type = BW_UPLOAD_PARAMETERS;
+ work->completion_barrier = (iptr)&ctx->shared_memory->parameters_sync;
+ mem_copy(&ctx->shared_memory->parameters.output_min_coordinate,
+ &ui->params, sizeof(ui->params));
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+ ui->flush_params = 0;
+ ctx->start_compute = 1;
}
}
diff --git a/util.h b/util.h
@@ -252,6 +252,9 @@ typedef OS_READ_WHOLE_FILE_FN(os_read_whole_file_fn);
#define OS_READ_FILE_FN(name) iz name(iptr file, void *buf, iz size)
typedef OS_READ_FILE_FN(os_read_file_fn);
+#define OS_WAIT_ON_VALUE_FN(name) void name(i32 *value, i32 current, i32 timeout_ms)
+typedef OS_WAIT_ON_VALUE_FN(os_wait_on_value_fn);
+
#define OS_WAKE_WAITERS_FN(name) void name(i32 *sync)
typedef OS_WAKE_WAITERS_FN(os_wake_waiters_fn);
@@ -271,6 +274,7 @@ typedef OS_THREAD_ENTRY_POINT_FN(os_thread_entry_point_fn);
X(open_for_write) \
X(read_file) \
X(read_whole_file) \
+ X(wait_on_value) \
X(wake_waiters) \
X(write_new_file) \
X(write_file)