ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 80bfc2a08168a5e12ff2ce852ba74540f0a5460c
Parent: de13041fc7b016d02ca216295a19a10c931a9c82
Author: Randy Palamar
Date:   Wed,  4 Sep 2024 18:31:45 -0600

cleanup library loading

It should now be possible to hot reload the cuda lib and
(untested) hot reload the main application on win32.

Diffstat:
Mbeamformer.c | 12++++++------
Mbeamformer.h | 58++++++++++++++++++++++++++++++++++++++++++----------------
Mbuild.sh | 6+++++-
Mmain.c | 65+++++++++++++++++++++++++++++++----------------------------------
Mos_unix.c | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mos_win32.c | 22++++++++++++++++++----
Mutil.h | 2--
7 files changed, 155 insertions(+), 67 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -77,10 +77,10 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a) break; case GL_VENDOR_NVIDIA: cs->raw_data_arena = os_alloc_arena(cs->raw_data_arena, full_rf_buf_size); - register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos), - cs->raw_data_ssbo); - init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E, bp->channel_mapping, - bp->channel_offset > 0); + ctx->cuda_lib.register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos), + cs->raw_data_ssbo); + ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E, + bp->channel_mapping, bp->channel_offset > 0); break; } @@ -125,12 +125,12 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; break; case CS_CUDA_DECODE: - cuda_decode(csctx->raw_data_index * rf_raw_size, output_ssbo_idx); + ctx->cuda_lib.cuda_decode(csctx->raw_data_index * rf_raw_size, output_ssbo_idx); csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; break; case CS_CUDA_HILBERT: - cuda_hilbert(input_ssbo_idx, output_ssbo_idx); + ctx->cuda_lib.cuda_hilbert(input_ssbo_idx, output_ssbo_idx); csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; break; case CS_DEMOD: diff --git a/beamformer.h b/beamformer.h @@ -83,11 +83,33 @@ typedef struct { #if defined(__unix__) #include "os_unix.c" + #ifdef _DEBUG + #define DEBUG_EXPORT + #define OS_DEBUG_LIB_NAME "./beamformer.so" + #define OS_DEBUG_LIB_TEMP_NAME "./beamformer_temp.so" + #else + #define DEBUG_EXPORT static + #endif + + #define OS_CUDA_LIB_NAME "./extern/cuda_toolkit.so" + #define OS_CUDA_LIB_TEMP_NAME "./extern/cuda_toolkit_temp.so" + #define OS_PIPE_NAME "/tmp/beamformer_data_fifo" #define OS_SMEM_NAME "/ogl_beamformer_parameters" #elif defined(_WIN32) #include "os_win32.c" + #ifdef _DEBUG + #define DEBUG_EXPORT __declspec(dllexport) + #define OS_DEBUG_LIB_NAME "beamformer.dll" + #define OS_DEBUG_LIB_TEMP_NAME "beamformer_temp.dll" + #else + #define DEBUG_EXPORT static + #endif + + #define OS_CUDA_LIB_NAME "extern/cuda_toolkit.dll" + #define OS_CUDA_LIB_TEMP_NAME "extern/cuda_toolkit_temp.dll" + #define OS_PIPE_NAME "\\\\.\\pipe\\beamformer_data_fifo" #define OS_SMEM_NAME "Local\\ogl_beamformer_parameters" #else @@ -96,6 +118,24 @@ typedef struct { #define MAX_FRAMES_IN_FLIGHT 3 +#define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u32 *channel_mapping, b32 rx_cols) +typedef INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_fn); +#define REGISTER_CUDA_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo) +typedef REGISTER_CUDA_BUFFERS_FN(register_cuda_buffers_fn); +#define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx) +typedef CUDA_DECODE_FN(cuda_decode_fn); +#define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx) +typedef CUDA_HILBERT_FN(cuda_hilbert_fn); + +typedef struct { + os_library_handle lib; + os_filetime timestamp; + init_cuda_configuration_fn *init_cuda_configuration; + register_cuda_buffers_fn *register_cuda_buffers; + cuda_decode_fn *cuda_decode; + cuda_hilbert_fn *cuda_hilbert; +} CudaLib; + typedef struct { u32 programs[CS_LAST]; @@ -164,23 +204,9 @@ typedef struct { os_pipe data_pipe; u32 partial_transfer_count; + CudaLib cuda_lib; + BeamformerParametersFull *params; } BeamformerCtx; -#define CUDA_LIB_NAME "cuda_toolkit.dll" - -#define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u32 *channel_mapping, b32 rx_cols) -typedef INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_fn); -#define REGISTER_CUDA_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo) -typedef REGISTER_CUDA_BUFFERS_FN(register_cuda_buffers_fn); -#define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx) -typedef CUDA_DECODE_FN(cuda_decode_fn); -#define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx) -typedef CUDA_HILBERT_FN(cuda_hilbert_fn); - -static init_cuda_configuration_fn *init_cuda_configuration; -static register_cuda_buffers_fn *register_cuda_buffers; -static cuda_decode_fn *cuda_decode; -static cuda_hilbert_fn *cuda_hilbert; - #endif /*_BEAMFORMER_H_ */ diff --git a/build.sh b/build.sh @@ -1,5 +1,7 @@ #!/bin/sh cflags="-march=native -std=c11 -O3 -Wall -I./external/include" +#cflags="${cflags} -fproc-stat-report" +#cflags="${cflags} -Rpass-missed=.*" libcflags="$cflags -fPIC -shared" ldflags="-lraylib -lm" @@ -12,11 +14,13 @@ case $(uname -s) in MINGW64*) os="win32" ldflags="$ldflags -lgdi32 -lwinmm" + libname="beamformer.dll" ;; Linux*) os="unix" cflags="$cflags -D_DEFAULT_SOURCE" libcflags="$libcflags -I/opt/matlab/extern/include" + libname="beamformer.so" ;; esac @@ -59,7 +63,7 @@ if [ "$debug" ]; then libcflags="$cflags -fPIC" libldflags="$ldflags -shared" - ${cc} $libcflags beamformer.c -o beamformer.so $libldflags + ${cc} $libcflags beamformer.c -o $libname $libldflags fi ${cc} $cflags -o ogl main.c $ldflags diff --git a/main.c b/main.c @@ -1,8 +1,6 @@ /* See LICENSE for license details. */ #include "beamformer.h" -static os_library_handle g_cuda_lib_handle; - static char *compute_shader_paths[CS_LAST] = { [CS_HADAMARD] = "shaders/hadamard.glsl", [CS_HERCULES] = "shaders/2d_hercules.glsl", @@ -17,9 +15,6 @@ static char *compute_shader_paths[CS_LAST] = { static void do_debug(void) { } #else -#include <time.h> - -static char *libname = "./beamformer.so"; static os_library_handle libhandle; typedef void do_beamformer_fn(BeamformerCtx *, Arena); @@ -32,24 +27,16 @@ get_filetime(char *name) return fstats.timestamp; } -static b32 -filetime_is_newer(struct timespec a, struct timespec b) -{ - return (a.tv_sec - b.tv_sec) + (a.tv_nsec - b.tv_nsec); -} - static void do_debug(void) { static os_filetime updated_time; - os_filetime test_time = get_filetime(libname); - if (filetime_is_newer(test_time, updated_time)) { - struct timespec sleep_time = {.tv_sec = 0, .tv_nsec = 100e6}; - nanosleep(&sleep_time, &sleep_time); - os_close_library(libhandle); - libhandle = os_load_library(libname); + os_filetime test_time = get_filetime(OS_DEBUG_LIB_NAME); + if (os_filetime_is_newer(test_time, updated_time)) { + os_unload_library(libhandle); + libhandle = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME); do_beamformer = os_lookup_dynamic_symbol(libhandle, "do_beamformer"); - updated_time = test_time; + updated_time = test_time; } } @@ -150,6 +137,30 @@ reload_shaders(BeamformerCtx *ctx, Arena a) } } +static void +check_and_load_cuda_lib(CudaLib *cl) +{ + os_file_stats current = os_get_file_stats(OS_CUDA_LIB_NAME); + if (!os_filetime_is_newer(current.timestamp, cl->timestamp)) + return; + + TraceLog(LOG_INFO, "Loading CUDA lib: %s", OS_CUDA_LIB_NAME); + + cl->timestamp = current.timestamp; + os_unload_library(cl->lib); + cl->lib = os_load_library(OS_CUDA_LIB_NAME, OS_CUDA_LIB_TEMP_NAME); + + cl->init_cuda_configuration = os_lookup_dynamic_symbol(cl->lib, "init_cuda_configuration"); + cl->register_cuda_buffers = os_lookup_dynamic_symbol(cl->lib, "register_cuda_buffers"); + cl->cuda_decode = os_lookup_dynamic_symbol(cl->lib, "cuda_decode"); + cl->cuda_hilbert = os_lookup_dynamic_symbol(cl->lib, "cuda_hilbert"); + + if (!cl->init_cuda_configuration) cl->init_cuda_configuration = init_cuda_configuration_stub; + if (!cl->register_cuda_buffers) cl->register_cuda_buffers = register_cuda_buffers_stub; + if (!cl->cuda_decode) cl->cuda_decode = cuda_decode_stub; + if (!cl->cuda_hilbert) cl->cuda_hilbert = cuda_hilbert_stub; +} + int main(void) { @@ -200,22 +211,6 @@ main(void) } } - switch (ctx.gl_vendor_id) { - case GL_VENDOR_AMD: - case GL_VENDOR_INTEL: - break; - case GL_VENDOR_NVIDIA: - g_cuda_lib_handle = os_load_library(CUDA_LIB_NAME); - #define LOOKUP_CUDA_FN(f) \ - f = os_lookup_dynamic_symbol(g_cuda_lib_handle, #f); \ - if (!f) f = f##_stub - LOOKUP_CUDA_FN(init_cuda_configuration); - LOOKUP_CUDA_FN(register_cuda_buffers); - LOOKUP_CUDA_FN(cuda_decode); - LOOKUP_CUDA_FN(cuda_hilbert); - break; - } - /* NOTE: set up OpenGL debug logging */ glDebugMessageCallback(gl_debug_logger, NULL); #ifdef _DEBUG @@ -236,6 +231,8 @@ main(void) while(!WindowShouldClose()) { do_debug(); + if (ctx.gl_vendor_id == GL_VENDOR_NVIDIA) + check_and_load_cuda_lib(&ctx.cuda_lib); if (ctx.flags & RELOAD_SHADERS) { ctx.flags &= ~RELOAD_SHADERS; diff --git a/os_unix.c b/os_unix.c @@ -138,12 +138,50 @@ os_remove_shared_memory(char *name) shm_unlink(name); } +/* NOTE: complete garbage because there is no standarized copyfile() in POSix */ +static b32 +os_copy_file(char *name, char *new) +{ + b32 result = 0; + struct stat sb; + if (stat(name, &sb) < 0) + return 0; + + i32 fd_old = open(name, O_RDONLY); + i32 fd_new = open(new, O_WRONLY|O_TRUNC, sb.st_mode); + + if (fd_old < 0 || fd_new < 0) + goto ret; + u8 buf[4096]; + size copied = 0; + while (copied != sb.st_size) { + size r = read(fd_old, buf, ARRAY_COUNT(buf)); + if (r < 0) goto ret; + size w = write(fd_new, buf, r); + if (w < 0) goto ret; + copied += w; + } + result = 1; +ret: + if (fd_old != -1) close(fd_old); + if (fd_new != -1) close(fd_new); + return result; +} + static os_library_handle -os_load_library(char *name) +os_load_library(char *name, char *temp_name) { + if (temp_name) { + if (os_copy_file(name, temp_name)) + name = temp_name; + } os_library_handle res = dlopen(name, RTLD_NOW|RTLD_LOCAL); if (!res) TraceLog(LOG_WARNING, "os_load_library(%s): %s\n", name, dlerror()); + + if (temp_name) + unlink(temp_name); + return res; } @@ -158,12 +196,23 @@ os_lookup_dynamic_symbol(os_library_handle h, char *name) return res; } -#ifdef _DEBUG static void -os_close_library(os_library_handle h) +os_unload_library(os_library_handle h) { /* NOTE: glibc is buggy gnuware so we need to check this */ if (h) dlclose(h); } -#endif /* _DEBUG */ + +static b32 +os_filetime_is_newer(os_filetime a, os_filetime b) +{ + os_filetime result; + result.tv_sec = a.tv_sec - b.tv_sec; + result.tv_nsec = a.tv_nsec - b.tv_nsec; + if (result.tv_nsec < 0) { + result.tv_sec--; + result.tv_nsec += 1000000000L; + } + return result.tv_sec + result.tv_nsec > 0; +} diff --git a/os_win32.c b/os_win32.c @@ -145,11 +145,20 @@ os_remove_shared_memory(char *name) } static os_library_handle -os_load_library(char *name) +os_load_library(char *name, char *temp_name) { + if (temp_name) { + if (CopyFile(name, temp_name, 0)) + name = temp_name; + } + os_library_handle res = LoadLibraryA(name); if (!res) TraceLog(LOG_WARNING, "os_load_library(%s): %d\n", name, GetLastError()); + + if (temp_name) + DeleteFileA(temp_name); + return res; } @@ -164,10 +173,15 @@ os_lookup_dynamic_symbol(os_library_handle h, char *name) return res; } -#ifdef _DEBUG static void -os_close_library(os_library_handle h) +os_unload_library(os_library_handle h) { FreeLibrary(h); } -#endif /* _DEBUG */ + +static b32 +os_filetime_is_newer(os_filetime a, os_filetime b) +{ + b32 result = CompareFileTime(&a, &b) > 0; + return result; +} diff --git a/util.h b/util.h @@ -11,10 +11,8 @@ #ifdef _DEBUG #define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0); - #define DEBUG_EXPORT #else #define ASSERT(c) - #define DEBUG_EXPORT static #endif #define static_assert _Static_assert