ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 8408acf219120932b5db273588d2542b87954356
Parent: 2959f4674a28a6da90eeea06bba09f5f783f2650
Author: Randy Palamar
Date:   Sun,  6 Apr 2025 16:10:57 -0600

core/lib: use upload parameters command

Diffstat:
Mbeamformer.c | 40++++++++++++++++++----------------------
Mbeamformer_work_queue.c | 18++++++++++++++++++
Mbeamformer_work_queue.h | 6+++---
Mhelpers/ogl_beamformer_lib.c | 60++++++++++++++++++++++++++----------------------------------
Mhelpers/ogl_beamformer_lib.h | 1+
Mos_linux.c | 5++---
Mos_win32.c | 5++---
Mstatic.c | 1+
Mui.c | 18+++++++++++-------
Mutil.h | 4++++
10 files changed, 86 insertions(+), 72 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -106,7 +106,7 @@ static void alloc_shader_storage(BeamformerCtx *ctx, Arena a) { ComputeShaderCtx *cs = &ctx->csctx; - BeamformerParameters *bp = &ctx->shared_memory->raw; + BeamformerParameters *bp = &ctx->shared_memory->parameters; uv4 dec_data_dim = bp->dec_data_dim; u32 rf_raw_size = ctx->shared_memory->raw_data_size; @@ -388,7 +388,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame, ASSERT(frame >= ctx->beamform_frames); ASSERT(frame < ctx->beamform_frames + ARRAY_COUNT(ctx->beamform_frames)); u32 base_index = (u32)(frame - ctx->beamform_frames); - u32 to_average = ctx->shared_memory->raw.output_points.w; + u32 to_average = ctx->shared_memory->parameters.output_points.w; u32 frame_count = 0; u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES); ComputeFrameIterator cfi = compute_frame_iterator(ctx, 1 + base_index - to_average, @@ -552,7 +552,7 @@ static void complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_context, iz barrier_offset) { ComputeShaderCtx *cs = &ctx->csctx; - BeamformerParameters *bp = &ctx->shared_memory->raw; + BeamformerParameters *bp = &ctx->shared_memory->parameters; BeamformerSharedMemory *sm = ctx->shared_memory; BeamformWork *work = beamform_work_queue_pop(q); @@ -607,6 +607,12 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co ARRAY_COUNT(sm->focal_vectors), GL_RG, GL_FLOAT, sm->focal_vectors); } break; + case BW_UPLOAD_PARAMETERS: { + ASSERT(!atomic_load(&ctx->shared_memory->parameters_sync)); + glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->shared_memory->parameters), + &ctx->shared_memory->parameters); + ctx->ui_read_params = !work->generic; + } break; case BW_UPLOAD_RF_DATA: { ASSERT(!atomic_load(&ctx->shared_memory->raw_data_sync)); @@ -641,25 +647,20 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co GL_SHORT, sm->sparse_elements); } break; case BW_COMPUTE: { + BeamformerParameters *bp = &ctx->shared_memory->parameters; atomic_store(&cs->processing_compute, 1); start_renderdoc_capture(gl_context); BeamformComputeFrame *frame = work->frame; - if (ctx->shared_memory->upload) { - glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->shared_memory->raw), - &ctx->shared_memory->raw); - ctx->shared_memory->upload = 0; - } - if (cs->programs[CS_DAS]) glProgramUniform1ui(cs->programs[CS_DAS], cs->cycle_t_id, cycle_t++); - uv3 try_dim = make_valid_test_dim(ctx->shared_memory->raw.output_points.xyz); + uv3 try_dim = make_valid_test_dim(bp->output_points.xyz); if (!uv3_equal(try_dim, frame->frame.dim)) alloc_beamform_frame(&ctx->gl, &frame->frame, &frame->stats, try_dim, s8("Beamformed_Data"), arena); - if (ctx->shared_memory->raw.output_points.w > 1) { + if (bp->output_points.w > 1) { if (!uv3_equal(try_dim, ctx->averaged_frames[0].frame.dim)) { alloc_beamform_frame(&ctx->gl, &ctx->averaged_frames[0].frame, &ctx->averaged_frames[0].stats, @@ -671,10 +672,10 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co } frame->in_flight = 1; - frame->frame.min_coordinate = ctx->shared_memory->raw.output_min_coordinate; - frame->frame.max_coordinate = ctx->shared_memory->raw.output_max_coordinate; - frame->frame.das_shader_id = ctx->shared_memory->raw.das_shader_id; - frame->frame.compound_count = ctx->shared_memory->raw.dec_data_dim.z; + frame->frame.min_coordinate = bp->output_min_coordinate; + frame->frame.max_coordinate = bp->output_max_coordinate; + frame->frame.das_shader_id = bp->das_shader_id; + frame->frame.compound_count = bp->dec_data_dim.z; b32 did_sum_shader = 0; u32 stage_count = ctx->shared_memory->compute_stages_count; @@ -761,7 +762,7 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) DEBUG_DECL(end_frame_capture = ctx->os.end_frame_capture); } - BeamformerParameters *bp = &ctx->shared_memory->raw; + BeamformerParameters *bp = &ctx->shared_memory->parameters; if (ctx->shared_memory->start_compute) { ctx->shared_memory->start_compute = 0; BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); @@ -776,7 +777,7 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) iptr f = ctx->os.open_for_write(ctx->shared_memory->export_pipe_name); export->type = BW_SAVE_FRAME; export->output_frame_ctx.file_handle = f; - if (ctx->shared_memory->raw.output_points.w > 1) { + if (bp->output_points.w > 1) { u32 a_index = !(ctx->averaged_frame_index % ARRAY_COUNT(ctx->averaged_frames)); BeamformComputeFrame *aframe = ctx->averaged_frames + a_index; @@ -789,11 +790,6 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) ctx->shared_memory->export_next_frame = 0; } - if (ctx->shared_memory->upload) { - /* TODO(rnp): clean this up */ - ctx->ui_read_params = 1; - } - ctx->os.wake_waiters(&ctx->os.compute_worker.sync_variable); } } diff --git a/beamformer_work_queue.c b/beamformer_work_queue.c @@ -50,3 +50,21 @@ DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit) { atomic_add(&q->queue, 1); } + +static b32 +try_wait_sync(i32 *sync, i32 timeout_ms, os_wait_on_value_fn *os_wait_on_value) +{ + b32 result = 0; + for (;;) { + i32 current = atomic_load(sync); + if (current) { + atomic_inc(sync, -current); + result = 1; + break; + } else if (!timeout_ms) { + break; + } + os_wait_on_value(sync, 0, timeout_ms); + } + return result; +} diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h @@ -12,7 +12,7 @@ typedef enum { BW_SEND_FRAME, BW_UPLOAD_CHANNEL_MAPPING, BW_UPLOAD_FOCAL_VECTORS, - BW_UPLOAD_PARAMS, + BW_UPLOAD_PARAMETERS, BW_UPLOAD_RF_DATA, BW_UPLOAD_SPARSE_ELEMENTS, } BeamformWorkType; @@ -57,7 +57,7 @@ typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn); #define BEAMFORMER_MAX_RF_DATA_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - BEAMFORMER_RF_DATA_OFF) typedef struct { - BeamformerParameters raw; + BeamformerParameters parameters; ComputeShaderID compute_stages[16]; u32 compute_stages_count; @@ -66,10 +66,10 @@ typedef struct { u32 raw_data_size; /* TODO(rnp): these shouldn't be needed */ - b32 upload; b32 start_compute; b32 export_next_frame; + i32 parameters_sync; i32 channel_mapping_sync; i32 sparse_elements_sync; i32 focal_vectors_sync; diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -71,11 +71,10 @@ void mexWarnMsgIdAndTxt(const c8*, c8*, ...); #endif #if defined(__linux__) -static void -os_wait_on_value(i32 *value, i32 current, u32 timeout_ms) +static OS_WAIT_ON_VALUE_FN(os_wait_on_value) { struct timespec *timeout = 0, timeout_value; - if (timeout_ms != U32_MAX) { + if (timeout_ms != -1) { timeout_value.tv_sec = timeout_ms / 1000; timeout_value.tv_nsec = (timeout_ms % 1000) * 1000000; timeout = &timeout_value; @@ -134,11 +133,10 @@ os_open_shared_memory_area(char *name) #elif defined(_WIN32) -static void -os_wait_on_value(i32 *value, i32 current, u32 timeout_ms) +static OS_WAIT_ON_VALUE_FN(os_wait_on_value) { i64 *timeout = 0, timeout_value; - if (timeout_ms != U32_MAX) { + if (timeout_ms != -1) { /* TODO(rnp): not sure about this one, but this is how wine converts the ms */ timeout_value = -(i64)timeout_ms * 10000; timeout = &timeout_value; @@ -211,24 +209,6 @@ os_open_shared_memory_area(char *name) #endif static b32 -try_wait_sync(i32 *sync, i32 timeout_ms) -{ - b32 result = 0; - for (;;) { - i32 current = atomic_load(sync); - if (current) { - atomic_inc(sync, -current); - result = 1; - break; - } else if (!timeout_ms) { - break; - } - os_wait_on_value(sync, 0, timeout_ms); - } - return result; -} - -static b32 check_shared_memory(char *name) { if (!g_bp) { @@ -280,7 +260,7 @@ b32 beamformer_push_##name (char *shm_id, dtype *data, u32 count, i32 timeout_ms b32 result = check_shared_memory(shm_id) && count <= ARRAY_COUNT(g_bp->name); \ if (result) { \ BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); \ - result = work && try_wait_sync(&g_bp->name##_sync, timeout_ms); \ + result = work && try_wait_sync(&g_bp->name##_sync, timeout_ms, os_wait_on_value); \ if (result) { \ work->type = BW_UPLOAD_##command; \ work->completion_barrier = offsetof(BeamformerSharedMemory, name##_sync); \ @@ -294,6 +274,23 @@ BEAMFORMER_UPLOAD_FNS #undef X b32 +beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms) +{ + b32 result = check_shared_memory(shm_name); + if (result) { + BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); + result = work && try_wait_sync(&g_bp->parameters_sync, timeout_ms, os_wait_on_value); + if (result) { + work->type = BW_UPLOAD_PARAMETERS; + work->completion_barrier = offsetof(BeamformerSharedMemory, parameters_sync); + mem_copy(&g_bp->parameters, bp, sizeof(g_bp->parameters)); + beamform_work_queue_push_commit(&g_bp->external_work_queue); + } + } + return result; +} + +b32 set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp) { b32 result = 0; @@ -305,12 +302,7 @@ set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp) for (u32 i = 0; i < ARRAY_COUNT(focal_vectors); i++) focal_vectors[i] = (v2){{new_bp->transmit_angles[i], new_bp->focal_depths[i]}}; result |= beamformer_push_focal_vectors(shm_name, (f32 *)focal_vectors, ARRAY_COUNT(focal_vectors), 0); - - if (result) { - mem_copy(&g_bp->raw, &new_bp->xdc_transform, sizeof(g_bp->raw)); - g_bp->upload = 1; - } - + result |= beamformer_push_parameters(shm_name, (BeamformerParameters *)&new_bp->xdc_transform, 0); return result; } @@ -355,9 +347,9 @@ beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data if (output_points.z == 0) output_points.z = 1; output_points.w = 1; - g_bp->raw.output_points.x = output_points.x; - g_bp->raw.output_points.y = output_points.y; - g_bp->raw.output_points.z = output_points.z; + g_bp->parameters.output_points.x = output_points.x; + g_bp->parameters.output_points.y = output_points.y; + g_bp->parameters.output_points.z = output_points.z; g_bp->export_next_frame = 1; s8 export_name = s8(OS_EXPORT_PIPE_NAME); diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -36,3 +36,4 @@ LIB_FN b32 beamform_data_synchronized(char *pipe_name, char *shm_name, void *dat LIB_FN b32 beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms); LIB_FN b32 beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms); LIB_FN b32 beamformer_push_focal_vectors(char *shm_name, f32 *vectors, u32 count, i32 timeout_ms); +LIB_FN b32 beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms); diff --git a/os_linux.c b/os_linux.c @@ -252,11 +252,10 @@ os_create_thread(Arena arena, iptr user_context, s8 name, os_thread_entry_point_ return (iptr)result; } -static void -os_wait_on_value(i32 *value, i32 current, u32 timeout_ms) +static OS_WAIT_ON_VALUE_FN(os_wait_on_value) { struct timespec *timeout = 0, timeout_value; - if (timeout_ms != U32_MAX) { + if (timeout_ms != -1) { timeout_value.tv_sec = timeout_ms / 1000; timeout_value.tv_nsec = (timeout_ms % 1000) * 1000000; timeout = &timeout_value; diff --git a/os_win32.c b/os_win32.c @@ -319,11 +319,10 @@ os_create_thread(Arena arena, iptr user_context, s8 name, os_thread_entry_point_ return result; } -static void -os_wait_on_value(i32 *value, i32 current, u32 timeout_ms) +static OS_WAIT_ON_VALUE_FN(os_wait_on_value) { i64 *timeout = 0, timeout_value; - if (timeout_ms != U32_MAX) { + if (timeout_ms != -1) { /* TODO(rnp): not sure about this one, but this is how wine converts the ms */ timeout_value = -(i64)timeout_ms * 10000; timeout = &timeout_value; diff --git a/static.c b/static.c @@ -306,6 +306,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) if (!ctx->shared_memory) os_fatal(s8("Get more ram lol\n")); /* TODO(rnp): refactor - this is annoying */ + ctx->shared_memory->parameters_sync = 1; ctx->shared_memory->raw_data_sync = 1; ctx->shared_memory->channel_mapping_sync = 1; ctx->shared_memory->sparse_elements_sync = 1; diff --git a/ui.c b/ui.c @@ -2380,7 +2380,7 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw /* TODO(rnp): there should be a better way of detecting this */ if (ctx->ui_read_params) { - mem_copy(&ui->params, &ctx->shared_memory->raw.output_min_coordinate, sizeof(ui->params)); + mem_copy(&ui->params, &ctx->shared_memory->parameters.output_min_coordinate, sizeof(ui->params)); ui->flush_params = 0; ctx->ui_read_params = 0; } @@ -2391,12 +2391,16 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw if (ui->flush_params) { validate_ui_parameters(&ui->params); - if (!ctx->csctx.processing_compute) { - mem_copy(&ctx->shared_memory->raw.output_min_coordinate, &ui->params, - sizeof(ui->params)); - ui->flush_params = 0; - ctx->shared_memory->upload = 1; - ctx->start_compute = 1; + BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); + if (work && try_wait_sync(&ctx->shared_memory->parameters_sync, 0, ctx->os.wait_on_value)) { + work->generic = (void *)1; + work->type = BW_UPLOAD_PARAMETERS; + work->completion_barrier = (iptr)&ctx->shared_memory->parameters_sync; + mem_copy(&ctx->shared_memory->parameters.output_min_coordinate, + &ui->params, sizeof(ui->params)); + beamform_work_queue_push_commit(ctx->beamform_work_queue); + ui->flush_params = 0; + ctx->start_compute = 1; } } diff --git a/util.h b/util.h @@ -252,6 +252,9 @@ typedef OS_READ_WHOLE_FILE_FN(os_read_whole_file_fn); #define OS_READ_FILE_FN(name) iz name(iptr file, void *buf, iz size) typedef OS_READ_FILE_FN(os_read_file_fn); +#define OS_WAIT_ON_VALUE_FN(name) void name(i32 *value, i32 current, i32 timeout_ms) +typedef OS_WAIT_ON_VALUE_FN(os_wait_on_value_fn); + #define OS_WAKE_WAITERS_FN(name) void name(i32 *sync) typedef OS_WAKE_WAITERS_FN(os_wake_waiters_fn); @@ -271,6 +274,7 @@ typedef OS_THREAD_ENTRY_POINT_FN(os_thread_entry_point_fn); X(open_for_write) \ X(read_file) \ X(read_whole_file) \ + X(wait_on_value) \ X(wake_waiters) \ X(write_new_file) \ X(write_file)