Commit: 6ba7f488631b2640c276ea51212688fe5192e1df
Parent: c268a6bc01452e94e82a881f1650d52ab0805dba
Author: Randy Palamar
Date: Fri, 14 Nov 2025 05:35:04 -0700
core: make gl_parameters into a global
this is read only after start up. no need to waste registers
passing a pointer around
Diffstat:
| M | beamformer.c | | | 21 | ++++++++------------- |
| M | beamformer.h | | | 30 | ------------------------------ |
| M | opengl.h | | | 29 | +++++++++++++++++++++++++++++ |
| M | static.c | | | 154 | +++++++++++++++++++++++++++++++++++++------------------------------------------ |
| M | ui.c | | | 2 | +- |
5 files changed, 111 insertions(+), 125 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -202,21 +202,16 @@ function iv3
make_valid_output_points(i32 points[3])
{
iv3 result;
- result.E[0] = MAX(1, points[0]);
- result.E[1] = MAX(1, points[1]);
- result.E[2] = MAX(1, points[2]);
+ result.E[0] = CLAMP(points[0], 1, gl_parameters.max_3d_texture_dim);
+ result.E[1] = CLAMP(points[1], 1, gl_parameters.max_3d_texture_dim);
+ result.E[2] = CLAMP(points[2], 1, gl_parameters.max_3d_texture_dim);
return result;
}
function void
-alloc_beamform_frame(GLParameters *gp, BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name, Arena arena)
+alloc_beamform_frame(BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name, Arena arena)
{
out->dim = make_valid_output_points(out_dim.E);
- if (gp) {
- out->dim.x = MIN(out->dim.x, gp->max_3d_texture_dim);
- out->dim.y = MIN(out->dim.y, gp->max_3d_texture_dim);
- out->dim.z = MIN(out->dim.z, gp->max_3d_texture_dim);
- }
/* NOTE: allocate storage for beamformed output data;
* this is shared between compute and fragment shaders */
@@ -859,8 +854,8 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp,
GLenum gl_kind = cp->iq_pipeline ? GL_RG32F : GL_R32F;
if (cp->average_frames > 1 && !beamformer_frame_compatible(ctx->averaged_frames + 0, cp->output_points, gl_kind)) {
- alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 0, cp->output_points, gl_kind, s8("Averaged Frame"), arena);
- alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 1, cp->output_points, gl_kind, s8("Averaged Frame"), arena);
+ alloc_beamform_frame(ctx->averaged_frames + 0, cp->output_points, gl_kind, s8("Averaged Frame"), arena);
+ alloc_beamform_frame(ctx->averaged_frames + 1, cp->output_points, gl_kind, s8("Averaged Frame"), arena);
}
}break;
case BeamformerParameterBlockRegion_ChannelMapping:{
@@ -1236,7 +1231,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
GLenum gl_kind = cp->iq_pipeline ? GL_RG32F : GL_R32F;
if (!beamformer_frame_compatible(frame, cp->output_points, gl_kind))
- alloc_beamform_frame(&ctx->gl, frame, cp->output_points, gl_kind, s8("Beamformed_Data"), *arena);
+ alloc_beamform_frame(frame, cp->output_points, gl_kind, s8("Beamformed_Data"), *arena);
frame->min_coordinate = cp->min_coordinate;
frame->max_coordinate = cp->max_coordinate;
@@ -1428,7 +1423,7 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload)
BeamformerParameters *bp = &b->parameters;
BeamformerDataKind data_kind = b->pipeline.data_kind;
- b32 nvidia = ctx->gl->vendor_id == GLVendor_NVIDIA;
+ b32 nvidia = gl_parameters.vendor_id == GLVendor_NVIDIA;
rf->active_rf_size = (u32)round_up_to(rf_block_rf_size & 0xFFFFFFFFULL, 64);
if (rf->size < rf->active_rf_size)
diff --git a/beamformer.h b/beamformer.h
@@ -90,33 +90,6 @@ typedef struct {
#include "beamformer_parameters.h"
#include "beamformer_shared_memory.c"
-typedef enum {
- GLVendor_AMD,
- GLVendor_ARM,
- GLVendor_Intel,
- GLVendor_NVIDIA,
-} GLVendorID;
-
-#define GL_PARAMETERS \
- X(MAJOR_VERSION, version_major, "") \
- X(MINOR_VERSION, version_minor, "") \
- X(MIN_MAP_BUFFER_ALIGNMENT, min_map_buffer_alignment, "") \
- X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \
- X(MAX_TEXTURE_BUFFER_SIZE, max_texture_buffer_size, "") \
- X(MAX_TEXTURE_SIZE, max_2d_texture_dim, "") \
- X(MAX_3D_TEXTURE_SIZE, max_3d_texture_dim, "") \
- X(MAX_SHADER_STORAGE_BLOCK_SIZE, max_ssbo_size, "") \
- X(MAX_COMPUTE_SHARED_MEMORY_SIZE, max_shared_memory_size, "") \
- X(MAX_UNIFORM_BLOCK_SIZE, max_ubo_size, "") \
- X(MAX_SERVER_WAIT_TIMEOUT, max_server_wait_time, " [ns]") \
-
-typedef struct {
- GLVendorID vendor_id;
- #define X(glname, name, suffix) i32 name;
- GL_PARAMETERS
- #undef X
-} GLParameters;
-
typedef struct {
iptr elements_offset;
i32 elements;
@@ -290,7 +263,6 @@ typedef struct {
SharedMemoryRegion *shared_memory;
ComputeTimingTable *compute_timing_table;
i32 *compute_worker_sync;
- GLParameters *gl;
} BeamformerUploadThreadContext;
struct BeamformerFrame {
@@ -317,8 +289,6 @@ struct BeamformerFrame {
};
typedef struct {
- GLParameters gl;
-
iv2 window_size;
b32 should_exit;
diff --git a/opengl.h b/opengl.h
@@ -160,4 +160,33 @@ OGLProcedureList
OGLProcedureList
#undef X
+typedef enum {
+ GLVendor_AMD,
+ GLVendor_ARM,
+ GLVendor_Intel,
+ GLVendor_NVIDIA,
+} GLVendorID;
+
+#define GL_PARAMETERS \
+ X(MAJOR_VERSION, version_major, "") \
+ X(MINOR_VERSION, version_minor, "") \
+ X(MIN_MAP_BUFFER_ALIGNMENT, min_map_buffer_alignment, "") \
+ X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \
+ X(MAX_TEXTURE_BUFFER_SIZE, max_texture_buffer_size, "") \
+ X(MAX_TEXTURE_SIZE, max_2d_texture_dim, "") \
+ X(MAX_3D_TEXTURE_SIZE, max_3d_texture_dim, "") \
+ X(MAX_SHADER_STORAGE_BLOCK_SIZE, max_ssbo_size, "") \
+ X(MAX_COMPUTE_SHARED_MEMORY_SIZE, max_shared_memory_size, "") \
+ X(MAX_UNIFORM_BLOCK_SIZE, max_ubo_size, "") \
+ X(MAX_SERVER_WAIT_TIMEOUT, max_server_wait_time, " [ns]") \
+
+typedef struct {
+ GLVendorID vendor_id;
+ #define X(glname, name, suffix) i32 name;
+ GL_PARAMETERS
+ #undef X
+} GLParameters;
+
+DEBUG_IMPORT GLParameters gl_parameters;
+
#endif /* _OPENGL_H_*/
diff --git a/static.c b/static.c
@@ -94,82 +94,83 @@ gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, co
}
function void
-get_gl_params(GLParameters *gl, Stream *err)
+load_gl(Stream *err)
{
- char *vendor = (char *)glGetString(GL_VENDOR);
- if (!vendor) {
- stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
- os_fatal(stream_to_s8(err));
- }
- /* TODO(rnp): str prefix of */
- switch (vendor[0]) {
- case 'A': gl->vendor_id = GLVendor_AMD; break;
- case 'I': gl->vendor_id = GLVendor_Intel; break;
- case 'N': gl->vendor_id = GLVendor_NVIDIA; break;
- /* NOTE(rnp): freedreno */
- case 'f': gl->vendor_id = GLVendor_ARM; break;
- /* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
- case 'M': gl->vendor_id = GLVendor_ARM; break;
- default:
- stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
- os_fatal(stream_to_s8(err));
- }
-
- #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl->name);
- GL_PARAMETERS
+ #define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
+ OGLProcedureList
#undef X
-}
-function void
-validate_gl_requirements(GLParameters *gl, Arena a)
-{
- Stream s = arena_stream(a);
+ /* NOTE: Gather information about the GPU */
+ {
+ char *vendor = (char *)glGetString(GL_VENDOR);
+ if (!vendor) {
+ stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
+ os_fatal(stream_to_s8(err));
+ }
+ /* TODO(rnp): str prefix of */
+ switch (vendor[0]) {
+ case 'A': gl_parameters.vendor_id = GLVendor_AMD; break;
+ case 'I': gl_parameters.vendor_id = GLVendor_Intel; break;
+ case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break;
+ /* NOTE(rnp): freedreno */
+ case 'f': gl_parameters.vendor_id = GLVendor_ARM; break;
+ /* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
+ case 'M': gl_parameters.vendor_id = GLVendor_ARM; break;
+ default:
+ stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
+ os_fatal(stream_to_s8(err));
+ }
- if (gl->max_ubo_size < (i32)sizeof(BeamformerParameters)) {
- stream_append_s8(&s, s8("GPU must support UBOs of at least "));
- stream_append_i64(&s, sizeof(BeamformerParameters));
- stream_append_s8(&s, s8(" bytes!\n"));
+ #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name);
+ GL_PARAMETERS
+ #undef X
}
- #define X(name, ret, params) if (!name) stream_append_s8s(&s, s8("missing required GL function:"), s8(#name), s8("\n"));
- OGLProcedureList
- #undef X
-
- if (s.widx) os_fatal(stream_to_s8(&s));
-}
-
-function void
-dump_gl_params(GLParameters *gl, Arena a)
-{
#ifdef _DEBUG
- s8 vendor = s8("vendor:");
- i32 max_width = (i32)vendor.len;
- #define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
- GL_PARAMETERS
- #undef X
- max_width++;
-
- Stream s = arena_stream(a);
- stream_append_s8s(&s, s8("---- GL Parameters ----\n"), vendor);
- stream_pad(&s, ' ', max_width - (i32)vendor.len);
- switch (gl->vendor_id) {
- case GLVendor_AMD: stream_append_s8(&s, s8("AMD\n")); break;
- case GLVendor_ARM: stream_append_s8(&s, s8("ARM\n")); break;
- case GLVendor_Intel: stream_append_s8(&s, s8("Intel\n")); break;
- case GLVendor_NVIDIA: stream_append_s8(&s, s8("nVidia\n")); break;
+ {
+ s8 vendor = s8("vendor:");
+ i32 max_width = (i32)vendor.len;
+ #define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
+ GL_PARAMETERS
+ #undef X
+ max_width++;
+
+ stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor);
+ stream_pad(err, ' ', max_width - (i32)vendor.len);
+ switch (gl_parameters.vendor_id) {
+ case GLVendor_AMD: stream_append_s8(err, s8("AMD")); break;
+ case GLVendor_ARM: stream_append_s8(err, s8("ARM")); break;
+ case GLVendor_Intel: stream_append_s8(err, s8("Intel")); break;
+ case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break;
+ }
+ stream_append_byte(err, '\n');
+
+ #define X(glname, name, suffix) \
+ stream_append_s8(err, s8(#name ":")); \
+ stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \
+ stream_append_i64(err, gl_parameters.name); \
+ stream_append_s8(err, s8(suffix "\n"));
+ GL_PARAMETERS
+ #undef X
+ stream_append_s8(err, s8("-----------------------\n"));
+ os_write_file(os_error_handle(), stream_to_s8(err));
}
-
- #define X(glname, name, suffix) \
- stream_append_s8(&s, s8(#name ":")); \
- stream_pad(&s, ' ', max_width - (i32)s8(#name ":").len); \
- stream_append_i64(&s, gl->name); \
- stream_append_s8(&s, s8(suffix)); \
- stream_append_byte(&s, '\n');
- GL_PARAMETERS
- #undef X
- stream_append_s8(&s, s8("-----------------------\n"));
- os_write_file(os_error_handle(), stream_to_s8(&s));
#endif
+
+ {
+ stream_reset(err, 0);
+ if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) {
+ stream_append_s8(err, s8("GPU must support UBOs of at least "));
+ stream_append_i64(err, sizeof(BeamformerParameters));
+ stream_append_s8(err, s8(" bytes!\n"));
+ }
+
+ #define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
+ OGLProcedureList
+ #undef X
+
+ if (err->widx) os_fatal(stream_to_s8(err));
+ }
}
function FILE_WATCH_CALLBACK_FN(reload_shader)
@@ -202,11 +203,10 @@ function FILE_WATCH_CALLBACK_FN(load_cuda_library)
{
local_persist void *cuda_library_handle;
- GLParameters *gl = (typeof(gl))user_data;
/* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently
* causing a major performance regression. for now we are disabling its use
* altogether. it will be reenabled once the issue can be fixed */
- b32 result = 0 && gl->vendor_id == GLVendor_NVIDIA && os_file_exists((c8 *)path.data);
+ b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && os_file_exists((c8 *)path.data);
if (result) {
Stream err = arena_stream(arena);
@@ -363,13 +363,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
glfwWindowHint(GLFW_VISIBLE, 0);
iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
- #define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
- OGLProcedureList
- #undef X
- /* NOTE: Gather information about the GPU */
- get_gl_params(&ctx->gl, &ctx->error_stream);
- dump_gl_params(&ctx->gl, *memory);
- validate_gl_requirements(&ctx->gl, *memory);
+ load_gl(&ctx->error_stream);
ctx->beamform_work_queue = push_struct(memory, BeamformWorkQueue);
ctx->compute_shader_stats = push_struct(memory, ComputeShaderStats);
@@ -403,16 +397,14 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
upctx->shared_memory = &ctx->shared_memory;
upctx->compute_timing_table = ctx->compute_timing_table;
upctx->compute_worker_sync = &ctx->compute_worker.sync_variable;
- upctx->gl = &ctx->gl;
upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
upload->handle = os_create_thread((iptr)upload, beamformer_upload_entry_point);
os_set_thread_name(worker->handle, s8("[upload]"));
glfwMakeContextCurrent(raylib_window_handle);
- if (load_cuda_library(s8(OS_CUDA_LIB_NAME), (iptr)&ctx->gl, *memory))
- os_add_file_watch(&ctx->file_watch_list, memory, s8(OS_CUDA_LIB_NAME),
- load_cuda_library, (iptr)&ctx->gl);
+ if (load_cuda_library(s8(OS_CUDA_LIB_NAME), 0, *memory))
+ os_add_file_watch(&ctx->file_watch_list, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, 0);
/* NOTE: set up OpenGL debug logging */
Stream *gl_error_stream = push_struct(memory, Stream);
@@ -445,7 +437,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
- i32 msaa_samples = ctx->gl.vendor_id == GLVendor_ARM? 4 : 8;
+ i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8;
glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
FRAME_VIEW_RENDER_TARGET_SIZE);
glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
diff --git a/ui.c b/ui.c
@@ -1468,7 +1468,7 @@ ui_beamformer_frame_view_copy_frame(BeamformerUI *ui, BeamformerFrameView *new,
mem_copy(new->frame, old->frame, sizeof(*new->frame));
new->frame->texture = 0;
new->frame->next = 0;
- alloc_beamform_frame(0, new->frame, old->frame->dim, old->frame->gl_kind, s8("Frame Copy: "), ui->arena);
+ alloc_beamform_frame(new->frame, old->frame->dim, old->frame->gl_kind, s8("Frame Copy: "), ui->arena);
glCopyImageSubData(old->frame->texture, GL_TEXTURE_3D, 0, 0, 0, 0,
new->frame->texture, GL_TEXTURE_3D, 0, 0, 0, 0,