Commit: eced14ae419e51674f65e0346afb8babeea31d70
Parent: 2d3df15f1c6f5e3f2ecbf61de94bfc4718d1c4ac
Author: Randy Palamar
Date: Sat, 9 Aug 2025 07:59:01 -0600
core: make cuda library function pointers into globals
Equivalent to how OpenGL function pointers are accessed this is
more efficient (rip relative addressing) and removes useless
context storage.
Diffstat:
4 files changed, 32 insertions(+), 39 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -254,8 +254,8 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 decoded_data_size, Arena arena)
* need to do. For now grab out of parameter block 0 but it is not correct */
BeamformerParameterBlock *pb = beamformer_parameter_block(ctx->shared_memory.region, 0);
/* NOTE(rnp): these are stubs when CUDA isn't supported */
- cc->cuda_lib.register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo);
- cc->cuda_lib.init(pb->parameters.rf_raw_dim, pb->parameters.dec_data_dim);
+ cuda_register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo);
+ cuda_init(pb->parameters.rf_raw_dim, pb->parameters.dec_data_dim);
}
function void
@@ -376,18 +376,18 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
BeamformerParameters *bp = &cp->das_ubo_data;
b32 decode_first = pb->pipeline.shaders[0] == BeamformerShaderKind_Decode;
- b32 cuda_hilbert = 0;
- b32 demodulate = 0;
+ b32 run_cuda_hilbert = 0;
+ b32 demodulate = 0;
for (u32 i = 0; i < pb->pipeline.shader_count; i++) {
switch (pb->pipeline.shaders[i]) {
- case BeamformerShaderKind_CudaHilbert:{ cuda_hilbert = 1; }break;
- case BeamformerShaderKind_Demodulate:{ demodulate = 1; }break;
+ case BeamformerShaderKind_CudaHilbert:{ run_cuda_hilbert = 1; }break;
+ case BeamformerShaderKind_Demodulate:{ demodulate = 1; }break;
default:{}break;
}
}
- if (demodulate) cuda_hilbert = 0;
+ if (demodulate) run_cuda_hilbert = 0;
mem_copy(bp, &pb->parameters, sizeof(*bp));
@@ -399,7 +399,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
b32 commit = 0;
switch (shader) {
- case BeamformerShaderKind_CudaHilbert:{ commit = cuda_hilbert; }break;
+ case BeamformerShaderKind_CudaHilbert:{ commit = run_cuda_hilbert; }break;
case BeamformerShaderKind_Decode:{
BeamformerShaderKind decode_table[] = {
[BeamformerDataKind_Int16] = BeamformerShaderKind_Decode,
@@ -575,7 +575,7 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp,
texture_type = GL_SHORT;
texture_format = GL_RED_INTEGER;
/* TODO(rnp): cuda lib */
- ctx->compute_context.cuda_lib.set_channel_mapping(pb->channel_mapping);
+ cuda_set_channel_mapping(pb->channel_mapping);
}break;
case BeamformerParameterBlockRegion_FocalVectors:{
texture_kind = BeamformerComputeTextureKind_FocalVectors;
@@ -677,11 +677,11 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
}break;
case BeamformerShaderKind_CudaDecode:{
- cc->cuda_lib.decode(0, output_ssbo_idx, 0);
+ cuda_decode(0, output_ssbo_idx, 0);
cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
}break;
case BeamformerShaderKind_CudaHilbert:{
- cc->cuda_lib.hilbert(input_ssbo_idx, output_ssbo_idx);
+ cuda_hilbert(input_ssbo_idx, output_ssbo_idx);
cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
}break;
case BeamformerShaderKind_Demodulate:
diff --git a/beamformer.h b/beamformer.h
@@ -52,19 +52,16 @@ CUDA_HILBERT_FN(cuda_hilbert_stub) {}
typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn);
CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {}
-#define CUDA_LIB_FNS \
+#define CUDALibraryProcedureList \
X(decode, "cuda_decode") \
X(hilbert, "cuda_hilbert") \
X(init, "init_cuda_configuration") \
X(register_buffers, "register_cuda_buffers") \
X(set_channel_mapping, "cuda_set_channel_mapping")
-typedef struct {
- void *lib;
- #define X(name, symname) cuda_ ## name ## _fn *name;
- CUDA_LIB_FNS
- #undef X
-} CudaLib;
+#define X(name, ...) DEBUG_IMPORT cuda_## name ##_fn *cuda_## name;
+CUDALibraryProcedureList
+#undef X
/* TODO(rnp): this should be a UBO */
#define FRAME_VIEW_MODEL_MATRIX_LOC 0
@@ -225,7 +222,6 @@ typedef struct {
u32 shader_timer_ids[BeamformerMaxComputeShaderStages];
BeamformerRenderModel unit_cube_model;
- CudaLib cuda_lib;
} BeamformerComputeContext;
typedef enum {
diff --git a/build.c b/build.c
@@ -874,10 +874,7 @@ main(i32 argc, char *argv[])
cmd_append(&arena, &c, OS_MAIN, OUTPUT_EXE("ogl"));
cmd_pdb(&arena, &c, "ogl");
if (options.debug) {
- /* NOTE(rnp): (gnu) ld doesn't properly export global symbols without this */
- if (is_gcc) cmd_append(&arena, &c, "-Wl,--export-dynamic");
-
- if (!is_w32) cmd_append(&arena, &c, "-Wl,-rpath,.");
+ if (!is_w32) cmd_append(&arena, &c, "-Wl,--export-dynamic", "-Wl,-rpath,.");
if (!is_msvc) cmd_append(&arena, &c, "-L.");
cmd_append(&arena, &c, LINK_LIB("raylib"));
} else {
diff --git a/static.c b/static.c
@@ -185,25 +185,26 @@ function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
return 1;
}
-function FILE_WATCH_CALLBACK_FN(load_cuda_lib)
+function FILE_WATCH_CALLBACK_FN(load_cuda_library)
{
- CudaLib *cl = (CudaLib *)user_data;
- b32 result = os_file_exists((c8 *)path.data);
+ local_persist void *cuda_library_handle;
+
+ b32 result = os_file_exists((c8 *)path.data);
if (result) {
Stream err = arena_stream(arena);
- stream_append_s8(&err, s8("loading CUDA lib: " OS_CUDA_LIB_NAME "\n"));
- os_unload_library(cl->lib);
- cl->lib = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
- #define X(name, symname) cl->name = os_lookup_dynamic_symbol(cl->lib, symname, &err);
- CUDA_LIB_FNS
+ stream_append_s8(&err, s8("loading CUDA library: " OS_CUDA_LIB_NAME "\n"));
+ os_unload_library(cuda_library_handle);
+ cuda_library_handle = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
+ #define X(name, symname) cuda_## name = os_lookup_dynamic_symbol(cuda_library_handle, symname, &err);
+ CUDALibraryProcedureList
#undef X
os_write_file(os->error_handle, stream_to_s8(&err));
}
- #define X(name, symname) if (!cl->name) cl->name = cuda_ ## name ## _stub;
- CUDA_LIB_FNS
+ #define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub;
+ CUDALibraryProcedureList
#undef X
return result;
@@ -386,14 +387,13 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
glfwMakeContextCurrent(raylib_window_handle);
+ #define X(name, ...) cuda_## name = cuda_## name ##_stub;
+ CUDALibraryProcedureList
+ #undef X
if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA
- && load_cuda_lib(&ctx->os, s8(OS_CUDA_LIB_NAME), (iptr)&cs->cuda_lib, *memory))
+ && load_cuda_library(&ctx->os, s8(OS_CUDA_LIB_NAME), 0, *memory))
{
- os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_lib, (iptr)&cs->cuda_lib);
- } else {
- #define X(name, symname) if (!cs->cuda_lib.name) cs->cuda_lib.name = cuda_ ## name ## _stub;
- CUDA_LIB_FNS
- #undef X
+ os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, 0);
}
/* NOTE: set up OpenGL debug logging */