ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 6ba7f488631b2640c276ea51212688fe5192e1df
Parent: c268a6bc01452e94e82a881f1650d52ab0805dba
Author: Randy Palamar
Date:   Fri, 14 Nov 2025 05:35:04 -0700

core: make gl_parameters into a global

this is read only after start up. no need to waste registers
passing a pointer around

Diffstat:
Mbeamformer.c | 21++++++++-------------
Mbeamformer.h | 30------------------------------
Mopengl.h | 29+++++++++++++++++++++++++++++
Mstatic.c | 154+++++++++++++++++++++++++++++++++++++------------------------------------------
Mui.c | 2+-
5 files changed, 111 insertions(+), 125 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -202,21 +202,16 @@ function iv3 make_valid_output_points(i32 points[3]) { iv3 result; - result.E[0] = MAX(1, points[0]); - result.E[1] = MAX(1, points[1]); - result.E[2] = MAX(1, points[2]); + result.E[0] = CLAMP(points[0], 1, gl_parameters.max_3d_texture_dim); + result.E[1] = CLAMP(points[1], 1, gl_parameters.max_3d_texture_dim); + result.E[2] = CLAMP(points[2], 1, gl_parameters.max_3d_texture_dim); return result; } function void -alloc_beamform_frame(GLParameters *gp, BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name, Arena arena) +alloc_beamform_frame(BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name, Arena arena) { out->dim = make_valid_output_points(out_dim.E); - if (gp) { - out->dim.x = MIN(out->dim.x, gp->max_3d_texture_dim); - out->dim.y = MIN(out->dim.y, gp->max_3d_texture_dim); - out->dim.z = MIN(out->dim.z, gp->max_3d_texture_dim); - } /* NOTE: allocate storage for beamformed output data; * this is shared between compute and fragment shaders */ @@ -859,8 +854,8 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, GLenum gl_kind = cp->iq_pipeline ? GL_RG32F : GL_R32F; if (cp->average_frames > 1 && !beamformer_frame_compatible(ctx->averaged_frames + 0, cp->output_points, gl_kind)) { - alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 0, cp->output_points, gl_kind, s8("Averaged Frame"), arena); - alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 1, cp->output_points, gl_kind, s8("Averaged Frame"), arena); + alloc_beamform_frame(ctx->averaged_frames + 0, cp->output_points, gl_kind, s8("Averaged Frame"), arena); + alloc_beamform_frame(ctx->averaged_frames + 1, cp->output_points, gl_kind, s8("Averaged Frame"), arena); } }break; case BeamformerParameterBlockRegion_ChannelMapping:{ @@ -1236,7 +1231,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c GLenum gl_kind = cp->iq_pipeline ? GL_RG32F : GL_R32F; if (!beamformer_frame_compatible(frame, cp->output_points, gl_kind)) - alloc_beamform_frame(&ctx->gl, frame, cp->output_points, gl_kind, s8("Beamformed_Data"), *arena); + alloc_beamform_frame(frame, cp->output_points, gl_kind, s8("Beamformed_Data"), *arena); frame->min_coordinate = cp->min_coordinate; frame->max_coordinate = cp->max_coordinate; @@ -1428,7 +1423,7 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) BeamformerParameters *bp = &b->parameters; BeamformerDataKind data_kind = b->pipeline.data_kind; - b32 nvidia = ctx->gl->vendor_id == GLVendor_NVIDIA; + b32 nvidia = gl_parameters.vendor_id == GLVendor_NVIDIA; rf->active_rf_size = (u32)round_up_to(rf_block_rf_size & 0xFFFFFFFFULL, 64); if (rf->size < rf->active_rf_size) diff --git a/beamformer.h b/beamformer.h @@ -90,33 +90,6 @@ typedef struct { #include "beamformer_parameters.h" #include "beamformer_shared_memory.c" -typedef enum { - GLVendor_AMD, - GLVendor_ARM, - GLVendor_Intel, - GLVendor_NVIDIA, -} GLVendorID; - -#define GL_PARAMETERS \ - X(MAJOR_VERSION, version_major, "") \ - X(MINOR_VERSION, version_minor, "") \ - X(MIN_MAP_BUFFER_ALIGNMENT, min_map_buffer_alignment, "") \ - X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \ - X(MAX_TEXTURE_BUFFER_SIZE, max_texture_buffer_size, "") \ - X(MAX_TEXTURE_SIZE, max_2d_texture_dim, "") \ - X(MAX_3D_TEXTURE_SIZE, max_3d_texture_dim, "") \ - X(MAX_SHADER_STORAGE_BLOCK_SIZE, max_ssbo_size, "") \ - X(MAX_COMPUTE_SHARED_MEMORY_SIZE, max_shared_memory_size, "") \ - X(MAX_UNIFORM_BLOCK_SIZE, max_ubo_size, "") \ - X(MAX_SERVER_WAIT_TIMEOUT, max_server_wait_time, " [ns]") \ - -typedef struct { - GLVendorID vendor_id; - #define X(glname, name, suffix) i32 name; - GL_PARAMETERS - #undef X -} GLParameters; - typedef struct { iptr elements_offset; i32 elements; @@ -290,7 +263,6 @@ typedef struct { SharedMemoryRegion *shared_memory; ComputeTimingTable *compute_timing_table; i32 *compute_worker_sync; - GLParameters *gl; } BeamformerUploadThreadContext; struct BeamformerFrame { @@ -317,8 +289,6 @@ struct BeamformerFrame { }; typedef struct { - GLParameters gl; - iv2 window_size; b32 should_exit; diff --git a/opengl.h b/opengl.h @@ -160,4 +160,33 @@ OGLProcedureList OGLProcedureList #undef X +typedef enum { + GLVendor_AMD, + GLVendor_ARM, + GLVendor_Intel, + GLVendor_NVIDIA, +} GLVendorID; + +#define GL_PARAMETERS \ + X(MAJOR_VERSION, version_major, "") \ + X(MINOR_VERSION, version_minor, "") \ + X(MIN_MAP_BUFFER_ALIGNMENT, min_map_buffer_alignment, "") \ + X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \ + X(MAX_TEXTURE_BUFFER_SIZE, max_texture_buffer_size, "") \ + X(MAX_TEXTURE_SIZE, max_2d_texture_dim, "") \ + X(MAX_3D_TEXTURE_SIZE, max_3d_texture_dim, "") \ + X(MAX_SHADER_STORAGE_BLOCK_SIZE, max_ssbo_size, "") \ + X(MAX_COMPUTE_SHARED_MEMORY_SIZE, max_shared_memory_size, "") \ + X(MAX_UNIFORM_BLOCK_SIZE, max_ubo_size, "") \ + X(MAX_SERVER_WAIT_TIMEOUT, max_server_wait_time, " [ns]") \ + +typedef struct { + GLVendorID vendor_id; + #define X(glname, name, suffix) i32 name; + GL_PARAMETERS + #undef X +} GLParameters; + +DEBUG_IMPORT GLParameters gl_parameters; + #endif /* _OPENGL_H_*/ diff --git a/static.c b/static.c @@ -94,82 +94,83 @@ gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, co } function void -get_gl_params(GLParameters *gl, Stream *err) +load_gl(Stream *err) { - char *vendor = (char *)glGetString(GL_VENDOR); - if (!vendor) { - stream_append_s8(err, s8("Failed to determine GL Vendor\n")); - os_fatal(stream_to_s8(err)); - } - /* TODO(rnp): str prefix of */ - switch (vendor[0]) { - case 'A': gl->vendor_id = GLVendor_AMD; break; - case 'I': gl->vendor_id = GLVendor_Intel; break; - case 'N': gl->vendor_id = GLVendor_NVIDIA; break; - /* NOTE(rnp): freedreno */ - case 'f': gl->vendor_id = GLVendor_ARM; break; - /* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */ - case 'M': gl->vendor_id = GLVendor_ARM; break; - default: - stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n")); - os_fatal(stream_to_s8(err)); - } - - #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl->name); - GL_PARAMETERS + #define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name); + OGLProcedureList #undef X -} -function void -validate_gl_requirements(GLParameters *gl, Arena a) -{ - Stream s = arena_stream(a); + /* NOTE: Gather information about the GPU */ + { + char *vendor = (char *)glGetString(GL_VENDOR); + if (!vendor) { + stream_append_s8(err, s8("Failed to determine GL Vendor\n")); + os_fatal(stream_to_s8(err)); + } + /* TODO(rnp): str prefix of */ + switch (vendor[0]) { + case 'A': gl_parameters.vendor_id = GLVendor_AMD; break; + case 'I': gl_parameters.vendor_id = GLVendor_Intel; break; + case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break; + /* NOTE(rnp): freedreno */ + case 'f': gl_parameters.vendor_id = GLVendor_ARM; break; + /* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */ + case 'M': gl_parameters.vendor_id = GLVendor_ARM; break; + default: + stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n")); + os_fatal(stream_to_s8(err)); + } - if (gl->max_ubo_size < (i32)sizeof(BeamformerParameters)) { - stream_append_s8(&s, s8("GPU must support UBOs of at least ")); - stream_append_i64(&s, sizeof(BeamformerParameters)); - stream_append_s8(&s, s8(" bytes!\n")); + #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name); + GL_PARAMETERS + #undef X } - #define X(name, ret, params) if (!name) stream_append_s8s(&s, s8("missing required GL function:"), s8(#name), s8("\n")); - OGLProcedureList - #undef X - - if (s.widx) os_fatal(stream_to_s8(&s)); -} - -function void -dump_gl_params(GLParameters *gl, Arena a) -{ #ifdef _DEBUG - s8 vendor = s8("vendor:"); - i32 max_width = (i32)vendor.len; - #define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len; - GL_PARAMETERS - #undef X - max_width++; - - Stream s = arena_stream(a); - stream_append_s8s(&s, s8("---- GL Parameters ----\n"), vendor); - stream_pad(&s, ' ', max_width - (i32)vendor.len); - switch (gl->vendor_id) { - case GLVendor_AMD: stream_append_s8(&s, s8("AMD\n")); break; - case GLVendor_ARM: stream_append_s8(&s, s8("ARM\n")); break; - case GLVendor_Intel: stream_append_s8(&s, s8("Intel\n")); break; - case GLVendor_NVIDIA: stream_append_s8(&s, s8("nVidia\n")); break; + { + s8 vendor = s8("vendor:"); + i32 max_width = (i32)vendor.len; + #define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len; + GL_PARAMETERS + #undef X + max_width++; + + stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor); + stream_pad(err, ' ', max_width - (i32)vendor.len); + switch (gl_parameters.vendor_id) { + case GLVendor_AMD: stream_append_s8(err, s8("AMD")); break; + case GLVendor_ARM: stream_append_s8(err, s8("ARM")); break; + case GLVendor_Intel: stream_append_s8(err, s8("Intel")); break; + case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break; + } + stream_append_byte(err, '\n'); + + #define X(glname, name, suffix) \ + stream_append_s8(err, s8(#name ":")); \ + stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \ + stream_append_i64(err, gl_parameters.name); \ + stream_append_s8(err, s8(suffix "\n")); + GL_PARAMETERS + #undef X + stream_append_s8(err, s8("-----------------------\n")); + os_write_file(os_error_handle(), stream_to_s8(err)); } - - #define X(glname, name, suffix) \ - stream_append_s8(&s, s8(#name ":")); \ - stream_pad(&s, ' ', max_width - (i32)s8(#name ":").len); \ - stream_append_i64(&s, gl->name); \ - stream_append_s8(&s, s8(suffix)); \ - stream_append_byte(&s, '\n'); - GL_PARAMETERS - #undef X - stream_append_s8(&s, s8("-----------------------\n")); - os_write_file(os_error_handle(), stream_to_s8(&s)); #endif + + { + stream_reset(err, 0); + if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) { + stream_append_s8(err, s8("GPU must support UBOs of at least ")); + stream_append_i64(err, sizeof(BeamformerParameters)); + stream_append_s8(err, s8(" bytes!\n")); + } + + #define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n")); + OGLProcedureList + #undef X + + if (err->widx) os_fatal(stream_to_s8(err)); + } } function FILE_WATCH_CALLBACK_FN(reload_shader) @@ -202,11 +203,10 @@ function FILE_WATCH_CALLBACK_FN(load_cuda_library) { local_persist void *cuda_library_handle; - GLParameters *gl = (typeof(gl))user_data; /* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently * causing a major performance regression. for now we are disabling its use * altogether. it will be reenabled once the issue can be fixed */ - b32 result = 0 && gl->vendor_id == GLVendor_NVIDIA && os_file_exists((c8 *)path.data); + b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && os_file_exists((c8 *)path.data); if (result) { Stream err = arena_stream(arena); @@ -363,13 +363,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input glfwWindowHint(GLFW_VISIBLE, 0); iptr raylib_window_handle = (iptr)GetPlatformWindowHandle(); - #define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name); - OGLProcedureList - #undef X - /* NOTE: Gather information about the GPU */ - get_gl_params(&ctx->gl, &ctx->error_stream); - dump_gl_params(&ctx->gl, *memory); - validate_gl_requirements(&ctx->gl, *memory); + load_gl(&ctx->error_stream); ctx->beamform_work_queue = push_struct(memory, BeamformWorkQueue); ctx->compute_shader_stats = push_struct(memory, ComputeShaderStats); @@ -403,16 +397,14 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input upctx->shared_memory = &ctx->shared_memory; upctx->compute_timing_table = ctx->compute_timing_table; upctx->compute_worker_sync = &ctx->compute_worker.sync_variable; - upctx->gl = &ctx->gl; upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle); upload->handle = os_create_thread((iptr)upload, beamformer_upload_entry_point); os_set_thread_name(worker->handle, s8("[upload]")); glfwMakeContextCurrent(raylib_window_handle); - if (load_cuda_library(s8(OS_CUDA_LIB_NAME), (iptr)&ctx->gl, *memory)) - os_add_file_watch(&ctx->file_watch_list, memory, s8(OS_CUDA_LIB_NAME), - load_cuda_library, (iptr)&ctx->gl); + if (load_cuda_library(s8(OS_CUDA_LIB_NAME), 0, *memory)) + os_add_file_watch(&ctx->file_watch_list, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, 0); /* NOTE: set up OpenGL debug logging */ Stream *gl_error_stream = push_struct(memory, Stream); @@ -445,7 +437,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer")); glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers); - i32 msaa_samples = ctx->gl.vendor_id == GLVendor_ARM? 4 : 8; + i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8; glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8, FRAME_VIEW_RENDER_TARGET_SIZE); glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24, diff --git a/ui.c b/ui.c @@ -1468,7 +1468,7 @@ ui_beamformer_frame_view_copy_frame(BeamformerUI *ui, BeamformerFrameView *new, mem_copy(new->frame, old->frame, sizeof(*new->frame)); new->frame->texture = 0; new->frame->next = 0; - alloc_beamform_frame(0, new->frame, old->frame->dim, old->frame->gl_kind, s8("Frame Copy: "), ui->arena); + alloc_beamform_frame(new->frame, old->frame->dim, old->frame->gl_kind, s8("Frame Copy: "), ui->arena); glCopyImageSubData(old->frame->texture, GL_TEXTURE_3D, 0, 0, 0, 0, new->frame->texture, GL_TEXTURE_3D, 0, 0, 0, 0,