ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: eced14ae419e51674f65e0346afb8babeea31d70
Parent: 2d3df15f1c6f5e3f2ecbf61de94bfc4718d1c4ac
Author: Randy Palamar
Date:   Sat,  9 Aug 2025 07:59:01 -0600

core: make cuda library function pointers into globals

Equivalent to how OpenGL function pointers are accessed this is
more efficient (rip relative addressing) and removes useless
context storage.

Diffstat:
Mbeamformer.c | 22+++++++++++-----------
Mbeamformer.h | 12++++--------
Mbuild.c | 5+----
Mstatic.c | 32++++++++++++++++----------------
4 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -254,8 +254,8 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 decoded_data_size, Arena arena) * need to do. For now grab out of parameter block 0 but it is not correct */ BeamformerParameterBlock *pb = beamformer_parameter_block(ctx->shared_memory.region, 0); /* NOTE(rnp): these are stubs when CUDA isn't supported */ - cc->cuda_lib.register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo); - cc->cuda_lib.init(pb->parameters.rf_raw_dim, pb->parameters.dec_data_dim); + cuda_register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo); + cuda_init(pb->parameters.rf_raw_dim, pb->parameters.dec_data_dim); } function void @@ -376,18 +376,18 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) BeamformerParameters *bp = &cp->das_ubo_data; b32 decode_first = pb->pipeline.shaders[0] == BeamformerShaderKind_Decode; - b32 cuda_hilbert = 0; - b32 demodulate = 0; + b32 run_cuda_hilbert = 0; + b32 demodulate = 0; for (u32 i = 0; i < pb->pipeline.shader_count; i++) { switch (pb->pipeline.shaders[i]) { - case BeamformerShaderKind_CudaHilbert:{ cuda_hilbert = 1; }break; - case BeamformerShaderKind_Demodulate:{ demodulate = 1; }break; + case BeamformerShaderKind_CudaHilbert:{ run_cuda_hilbert = 1; }break; + case BeamformerShaderKind_Demodulate:{ demodulate = 1; }break; default:{}break; } } - if (demodulate) cuda_hilbert = 0; + if (demodulate) run_cuda_hilbert = 0; mem_copy(bp, &pb->parameters, sizeof(*bp)); @@ -399,7 +399,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) b32 commit = 0; switch (shader) { - case BeamformerShaderKind_CudaHilbert:{ commit = cuda_hilbert; }break; + case BeamformerShaderKind_CudaHilbert:{ commit = run_cuda_hilbert; }break; case BeamformerShaderKind_Decode:{ BeamformerShaderKind decode_table[] = { [BeamformerDataKind_Int16] = BeamformerShaderKind_Decode, @@ -575,7 +575,7 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, texture_type = GL_SHORT; texture_format = GL_RED_INTEGER; /* TODO(rnp): cuda lib */ - ctx->compute_context.cuda_lib.set_channel_mapping(pb->channel_mapping); + cuda_set_channel_mapping(pb->channel_mapping); }break; case BeamformerParameterBlockRegion_FocalVectors:{ texture_kind = BeamformerComputeTextureKind_FocalVectors; @@ -677,11 +677,11 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame cc->last_output_ssbo_index = !cc->last_output_ssbo_index; }break; case BeamformerShaderKind_CudaDecode:{ - cc->cuda_lib.decode(0, output_ssbo_idx, 0); + cuda_decode(0, output_ssbo_idx, 0); cc->last_output_ssbo_index = !cc->last_output_ssbo_index; }break; case BeamformerShaderKind_CudaHilbert:{ - cc->cuda_lib.hilbert(input_ssbo_idx, output_ssbo_idx); + cuda_hilbert(input_ssbo_idx, output_ssbo_idx); cc->last_output_ssbo_index = !cc->last_output_ssbo_index; }break; case BeamformerShaderKind_Demodulate: diff --git a/beamformer.h b/beamformer.h @@ -52,19 +52,16 @@ CUDA_HILBERT_FN(cuda_hilbert_stub) {} typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn); CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {} -#define CUDA_LIB_FNS \ +#define CUDALibraryProcedureList \ X(decode, "cuda_decode") \ X(hilbert, "cuda_hilbert") \ X(init, "init_cuda_configuration") \ X(register_buffers, "register_cuda_buffers") \ X(set_channel_mapping, "cuda_set_channel_mapping") -typedef struct { - void *lib; - #define X(name, symname) cuda_ ## name ## _fn *name; - CUDA_LIB_FNS - #undef X -} CudaLib; +#define X(name, ...) DEBUG_IMPORT cuda_## name ##_fn *cuda_## name; +CUDALibraryProcedureList +#undef X /* TODO(rnp): this should be a UBO */ #define FRAME_VIEW_MODEL_MATRIX_LOC 0 @@ -225,7 +222,6 @@ typedef struct { u32 shader_timer_ids[BeamformerMaxComputeShaderStages]; BeamformerRenderModel unit_cube_model; - CudaLib cuda_lib; } BeamformerComputeContext; typedef enum { diff --git a/build.c b/build.c @@ -874,10 +874,7 @@ main(i32 argc, char *argv[]) cmd_append(&arena, &c, OS_MAIN, OUTPUT_EXE("ogl")); cmd_pdb(&arena, &c, "ogl"); if (options.debug) { - /* NOTE(rnp): (gnu) ld doesn't properly export global symbols without this */ - if (is_gcc) cmd_append(&arena, &c, "-Wl,--export-dynamic"); - - if (!is_w32) cmd_append(&arena, &c, "-Wl,-rpath,."); + if (!is_w32) cmd_append(&arena, &c, "-Wl,--export-dynamic", "-Wl,-rpath,."); if (!is_msvc) cmd_append(&arena, &c, "-L."); cmd_append(&arena, &c, LINK_LIB("raylib")); } else { diff --git a/static.c b/static.c @@ -185,25 +185,26 @@ function FILE_WATCH_CALLBACK_FN(reload_shader_indirect) return 1; } -function FILE_WATCH_CALLBACK_FN(load_cuda_lib) +function FILE_WATCH_CALLBACK_FN(load_cuda_library) { - CudaLib *cl = (CudaLib *)user_data; - b32 result = os_file_exists((c8 *)path.data); + local_persist void *cuda_library_handle; + + b32 result = os_file_exists((c8 *)path.data); if (result) { Stream err = arena_stream(arena); - stream_append_s8(&err, s8("loading CUDA lib: " OS_CUDA_LIB_NAME "\n")); - os_unload_library(cl->lib); - cl->lib = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err); - #define X(name, symname) cl->name = os_lookup_dynamic_symbol(cl->lib, symname, &err); - CUDA_LIB_FNS + stream_append_s8(&err, s8("loading CUDA library: " OS_CUDA_LIB_NAME "\n")); + os_unload_library(cuda_library_handle); + cuda_library_handle = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err); + #define X(name, symname) cuda_## name = os_lookup_dynamic_symbol(cuda_library_handle, symname, &err); + CUDALibraryProcedureList #undef X os_write_file(os->error_handle, stream_to_s8(&err)); } - #define X(name, symname) if (!cl->name) cl->name = cuda_ ## name ## _stub; - CUDA_LIB_FNS + #define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub; + CUDALibraryProcedureList #undef X return result; @@ -386,14 +387,13 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input glfwMakeContextCurrent(raylib_window_handle); + #define X(name, ...) cuda_## name = cuda_## name ##_stub; + CUDALibraryProcedureList + #undef X if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA - && load_cuda_lib(&ctx->os, s8(OS_CUDA_LIB_NAME), (iptr)&cs->cuda_lib, *memory)) + && load_cuda_library(&ctx->os, s8(OS_CUDA_LIB_NAME), 0, *memory)) { - os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_lib, (iptr)&cs->cuda_lib); - } else { - #define X(name, symname) if (!cs->cuda_lib.name) cs->cuda_lib.name = cuda_ ## name ## _stub; - CUDA_LIB_FNS - #undef X + os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, 0); } /* NOTE: set up OpenGL debug logging */