Commit: 80bfc2a08168a5e12ff2ce852ba74540f0a5460c
Parent: de13041fc7b016d02ca216295a19a10c931a9c82
Author: Randy Palamar
Date: Wed, 4 Sep 2024 18:31:45 -0600
cleanup library loading
It should now be possible to hot reload the cuda lib and
(untested) hot reload the main application on win32.
Diffstat:
M | beamformer.c | | | 12 | ++++++------ |
M | beamformer.h | | | 58 | ++++++++++++++++++++++++++++++++++++++++++---------------- |
M | build.sh | | | 6 | +++++- |
M | main.c | | | 65 | +++++++++++++++++++++++++++++++---------------------------------- |
M | os_unix.c | | | 57 | +++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
M | os_win32.c | | | 22 | ++++++++++++++++++---- |
M | util.h | | | 2 | -- |
7 files changed, 155 insertions(+), 67 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -77,10 +77,10 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
break;
case GL_VENDOR_NVIDIA:
cs->raw_data_arena = os_alloc_arena(cs->raw_data_arena, full_rf_buf_size);
- register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos),
- cs->raw_data_ssbo);
- init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E, bp->channel_mapping,
- bp->channel_offset > 0);
+ ctx->cuda_lib.register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos),
+ cs->raw_data_ssbo);
+ ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E,
+ bp->channel_mapping, bp->channel_offset > 0);
break;
}
@@ -125,12 +125,12 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_CUDA_DECODE:
- cuda_decode(csctx->raw_data_index * rf_raw_size, output_ssbo_idx);
+ ctx->cuda_lib.cuda_decode(csctx->raw_data_index * rf_raw_size, output_ssbo_idx);
csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_CUDA_HILBERT:
- cuda_hilbert(input_ssbo_idx, output_ssbo_idx);
+ ctx->cuda_lib.cuda_hilbert(input_ssbo_idx, output_ssbo_idx);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_DEMOD:
diff --git a/beamformer.h b/beamformer.h
@@ -83,11 +83,33 @@ typedef struct {
#if defined(__unix__)
#include "os_unix.c"
+ #ifdef _DEBUG
+ #define DEBUG_EXPORT
+ #define OS_DEBUG_LIB_NAME "./beamformer.so"
+ #define OS_DEBUG_LIB_TEMP_NAME "./beamformer_temp.so"
+ #else
+ #define DEBUG_EXPORT static
+ #endif
+
+ #define OS_CUDA_LIB_NAME "./extern/cuda_toolkit.so"
+ #define OS_CUDA_LIB_TEMP_NAME "./extern/cuda_toolkit_temp.so"
+
#define OS_PIPE_NAME "/tmp/beamformer_data_fifo"
#define OS_SMEM_NAME "/ogl_beamformer_parameters"
#elif defined(_WIN32)
#include "os_win32.c"
+ #ifdef _DEBUG
+ #define DEBUG_EXPORT __declspec(dllexport)
+ #define OS_DEBUG_LIB_NAME "beamformer.dll"
+ #define OS_DEBUG_LIB_TEMP_NAME "beamformer_temp.dll"
+ #else
+ #define DEBUG_EXPORT static
+ #endif
+
+ #define OS_CUDA_LIB_NAME "extern/cuda_toolkit.dll"
+ #define OS_CUDA_LIB_TEMP_NAME "extern/cuda_toolkit_temp.dll"
+
#define OS_PIPE_NAME "\\\\.\\pipe\\beamformer_data_fifo"
#define OS_SMEM_NAME "Local\\ogl_beamformer_parameters"
#else
@@ -96,6 +118,24 @@ typedef struct {
#define MAX_FRAMES_IN_FLIGHT 3
+#define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u32 *channel_mapping, b32 rx_cols)
+typedef INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_fn);
+#define REGISTER_CUDA_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo)
+typedef REGISTER_CUDA_BUFFERS_FN(register_cuda_buffers_fn);
+#define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx)
+typedef CUDA_DECODE_FN(cuda_decode_fn);
+#define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx)
+typedef CUDA_HILBERT_FN(cuda_hilbert_fn);
+
+typedef struct {
+ os_library_handle lib;
+ os_filetime timestamp;
+ init_cuda_configuration_fn *init_cuda_configuration;
+ register_cuda_buffers_fn *register_cuda_buffers;
+ cuda_decode_fn *cuda_decode;
+ cuda_hilbert_fn *cuda_hilbert;
+} CudaLib;
+
typedef struct {
u32 programs[CS_LAST];
@@ -164,23 +204,9 @@ typedef struct {
os_pipe data_pipe;
u32 partial_transfer_count;
+ CudaLib cuda_lib;
+
BeamformerParametersFull *params;
} BeamformerCtx;
-#define CUDA_LIB_NAME "cuda_toolkit.dll"
-
-#define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u32 *channel_mapping, b32 rx_cols)
-typedef INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_fn);
-#define REGISTER_CUDA_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo)
-typedef REGISTER_CUDA_BUFFERS_FN(register_cuda_buffers_fn);
-#define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx)
-typedef CUDA_DECODE_FN(cuda_decode_fn);
-#define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx)
-typedef CUDA_HILBERT_FN(cuda_hilbert_fn);
-
-static init_cuda_configuration_fn *init_cuda_configuration;
-static register_cuda_buffers_fn *register_cuda_buffers;
-static cuda_decode_fn *cuda_decode;
-static cuda_hilbert_fn *cuda_hilbert;
-
#endif /*_BEAMFORMER_H_ */
diff --git a/build.sh b/build.sh
@@ -1,5 +1,7 @@
#!/bin/sh
cflags="-march=native -std=c11 -O3 -Wall -I./external/include"
+#cflags="${cflags} -fproc-stat-report"
+#cflags="${cflags} -Rpass-missed=.*"
libcflags="$cflags -fPIC -shared"
ldflags="-lraylib -lm"
@@ -12,11 +14,13 @@ case $(uname -s) in
MINGW64*)
os="win32"
ldflags="$ldflags -lgdi32 -lwinmm"
+ libname="beamformer.dll"
;;
Linux*)
os="unix"
cflags="$cflags -D_DEFAULT_SOURCE"
libcflags="$libcflags -I/opt/matlab/extern/include"
+ libname="beamformer.so"
;;
esac
@@ -59,7 +63,7 @@ if [ "$debug" ]; then
libcflags="$cflags -fPIC"
libldflags="$ldflags -shared"
- ${cc} $libcflags beamformer.c -o beamformer.so $libldflags
+ ${cc} $libcflags beamformer.c -o $libname $libldflags
fi
${cc} $cflags -o ogl main.c $ldflags
diff --git a/main.c b/main.c
@@ -1,8 +1,6 @@
/* See LICENSE for license details. */
#include "beamformer.h"
-static os_library_handle g_cuda_lib_handle;
-
static char *compute_shader_paths[CS_LAST] = {
[CS_HADAMARD] = "shaders/hadamard.glsl",
[CS_HERCULES] = "shaders/2d_hercules.glsl",
@@ -17,9 +15,6 @@ static char *compute_shader_paths[CS_LAST] = {
static void do_debug(void) { }
#else
-#include <time.h>
-
-static char *libname = "./beamformer.so";
static os_library_handle libhandle;
typedef void do_beamformer_fn(BeamformerCtx *, Arena);
@@ -32,24 +27,16 @@ get_filetime(char *name)
return fstats.timestamp;
}
-static b32
-filetime_is_newer(struct timespec a, struct timespec b)
-{
- return (a.tv_sec - b.tv_sec) + (a.tv_nsec - b.tv_nsec);
-}
-
static void
do_debug(void)
{
static os_filetime updated_time;
- os_filetime test_time = get_filetime(libname);
- if (filetime_is_newer(test_time, updated_time)) {
- struct timespec sleep_time = {.tv_sec = 0, .tv_nsec = 100e6};
- nanosleep(&sleep_time, &sleep_time);
- os_close_library(libhandle);
- libhandle = os_load_library(libname);
+ os_filetime test_time = get_filetime(OS_DEBUG_LIB_NAME);
+ if (os_filetime_is_newer(test_time, updated_time)) {
+ os_unload_library(libhandle);
+ libhandle = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME);
do_beamformer = os_lookup_dynamic_symbol(libhandle, "do_beamformer");
- updated_time = test_time;
+ updated_time = test_time;
}
}
@@ -150,6 +137,30 @@ reload_shaders(BeamformerCtx *ctx, Arena a)
}
}
+static void
+check_and_load_cuda_lib(CudaLib *cl)
+{
+ os_file_stats current = os_get_file_stats(OS_CUDA_LIB_NAME);
+ if (!os_filetime_is_newer(current.timestamp, cl->timestamp))
+ return;
+
+ TraceLog(LOG_INFO, "Loading CUDA lib: %s", OS_CUDA_LIB_NAME);
+
+ cl->timestamp = current.timestamp;
+ os_unload_library(cl->lib);
+ cl->lib = os_load_library(OS_CUDA_LIB_NAME, OS_CUDA_LIB_TEMP_NAME);
+
+ cl->init_cuda_configuration = os_lookup_dynamic_symbol(cl->lib, "init_cuda_configuration");
+ cl->register_cuda_buffers = os_lookup_dynamic_symbol(cl->lib, "register_cuda_buffers");
+ cl->cuda_decode = os_lookup_dynamic_symbol(cl->lib, "cuda_decode");
+ cl->cuda_hilbert = os_lookup_dynamic_symbol(cl->lib, "cuda_hilbert");
+
+ if (!cl->init_cuda_configuration) cl->init_cuda_configuration = init_cuda_configuration_stub;
+ if (!cl->register_cuda_buffers) cl->register_cuda_buffers = register_cuda_buffers_stub;
+ if (!cl->cuda_decode) cl->cuda_decode = cuda_decode_stub;
+ if (!cl->cuda_hilbert) cl->cuda_hilbert = cuda_hilbert_stub;
+}
+
int
main(void)
{
@@ -200,22 +211,6 @@ main(void)
}
}
- switch (ctx.gl_vendor_id) {
- case GL_VENDOR_AMD:
- case GL_VENDOR_INTEL:
- break;
- case GL_VENDOR_NVIDIA:
- g_cuda_lib_handle = os_load_library(CUDA_LIB_NAME);
- #define LOOKUP_CUDA_FN(f) \
- f = os_lookup_dynamic_symbol(g_cuda_lib_handle, #f); \
- if (!f) f = f##_stub
- LOOKUP_CUDA_FN(init_cuda_configuration);
- LOOKUP_CUDA_FN(register_cuda_buffers);
- LOOKUP_CUDA_FN(cuda_decode);
- LOOKUP_CUDA_FN(cuda_hilbert);
- break;
- }
-
/* NOTE: set up OpenGL debug logging */
glDebugMessageCallback(gl_debug_logger, NULL);
#ifdef _DEBUG
@@ -236,6 +231,8 @@ main(void)
while(!WindowShouldClose()) {
do_debug();
+ if (ctx.gl_vendor_id == GL_VENDOR_NVIDIA)
+ check_and_load_cuda_lib(&ctx.cuda_lib);
if (ctx.flags & RELOAD_SHADERS) {
ctx.flags &= ~RELOAD_SHADERS;
diff --git a/os_unix.c b/os_unix.c
@@ -138,12 +138,50 @@ os_remove_shared_memory(char *name)
shm_unlink(name);
}
+/* NOTE: complete garbage because there is no standarized copyfile() in POSix */
+static b32
+os_copy_file(char *name, char *new)
+{
+ b32 result = 0;
+ struct stat sb;
+ if (stat(name, &sb) < 0)
+ return 0;
+
+ i32 fd_old = open(name, O_RDONLY);
+ i32 fd_new = open(new, O_WRONLY|O_TRUNC, sb.st_mode);
+
+ if (fd_old < 0 || fd_new < 0)
+ goto ret;
+ u8 buf[4096];
+ size copied = 0;
+ while (copied != sb.st_size) {
+ size r = read(fd_old, buf, ARRAY_COUNT(buf));
+ if (r < 0) goto ret;
+ size w = write(fd_new, buf, r);
+ if (w < 0) goto ret;
+ copied += w;
+ }
+ result = 1;
+ret:
+ if (fd_old != -1) close(fd_old);
+ if (fd_new != -1) close(fd_new);
+ return result;
+}
+
static os_library_handle
-os_load_library(char *name)
+os_load_library(char *name, char *temp_name)
{
+ if (temp_name) {
+ if (os_copy_file(name, temp_name))
+ name = temp_name;
+ }
os_library_handle res = dlopen(name, RTLD_NOW|RTLD_LOCAL);
if (!res)
TraceLog(LOG_WARNING, "os_load_library(%s): %s\n", name, dlerror());
+
+ if (temp_name)
+ unlink(temp_name);
+
return res;
}
@@ -158,12 +196,23 @@ os_lookup_dynamic_symbol(os_library_handle h, char *name)
return res;
}
-#ifdef _DEBUG
static void
-os_close_library(os_library_handle h)
+os_unload_library(os_library_handle h)
{
/* NOTE: glibc is buggy gnuware so we need to check this */
if (h)
dlclose(h);
}
-#endif /* _DEBUG */
+
+static b32
+os_filetime_is_newer(os_filetime a, os_filetime b)
+{
+ os_filetime result;
+ result.tv_sec = a.tv_sec - b.tv_sec;
+ result.tv_nsec = a.tv_nsec - b.tv_nsec;
+ if (result.tv_nsec < 0) {
+ result.tv_sec--;
+ result.tv_nsec += 1000000000L;
+ }
+ return result.tv_sec + result.tv_nsec > 0;
+}
diff --git a/os_win32.c b/os_win32.c
@@ -145,11 +145,20 @@ os_remove_shared_memory(char *name)
}
static os_library_handle
-os_load_library(char *name)
+os_load_library(char *name, char *temp_name)
{
+ if (temp_name) {
+ if (CopyFile(name, temp_name, 0))
+ name = temp_name;
+ }
+
os_library_handle res = LoadLibraryA(name);
if (!res)
TraceLog(LOG_WARNING, "os_load_library(%s): %d\n", name, GetLastError());
+
+ if (temp_name)
+ DeleteFileA(temp_name);
+
return res;
}
@@ -164,10 +173,15 @@ os_lookup_dynamic_symbol(os_library_handle h, char *name)
return res;
}
-#ifdef _DEBUG
static void
-os_close_library(os_library_handle h)
+os_unload_library(os_library_handle h)
{
FreeLibrary(h);
}
-#endif /* _DEBUG */
+
+static b32
+os_filetime_is_newer(os_filetime a, os_filetime b)
+{
+ b32 result = CompareFileTime(&a, &b) > 0;
+ return result;
+}
diff --git a/util.h b/util.h
@@ -11,10 +11,8 @@
#ifdef _DEBUG
#define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0);
- #define DEBUG_EXPORT
#else
#define ASSERT(c)
- #define DEBUG_EXPORT static
#endif
#define static_assert _Static_assert