Commit: 424ac3effa64f7cb5120ffaa594120621920578f
Parent: 8741f0be4c6ceab20d7fa32cb71da28a825b9f91
Author: Randy Palamar
Date: Wed, 14 Aug 2024 16:10:19 -0600
use bindless buffers where possible
This requires OpenGL 4.5 but at least on NVIDIA this is
significantly faster than the old method.
Diffstat:
3 files changed, 11 insertions(+), 25 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -67,7 +67,7 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
ctx->csctx.dec_data_dim = dec_data_dim;
glDeleteBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos);
- glGenBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos);
+ glCreateBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos);
i32 storage_flags = GL_DYNAMIC_STORAGE_BIT;
if (ctx->gl_vendor_id == GL_VENDOR_INTEL)
@@ -81,20 +81,19 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
if (cs->raw_data_arena.beg == 0)
cs->raw_data_arena = os_new_arena(rf_raw_size);
- for (u32 i = 0; i < ARRAY_COUNT(cs->rf_data_ssbos); i++) {
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, cs->rf_data_ssbos[i]);
- glBufferStorage(GL_SHADER_STORAGE_BUFFER, rf_decoded_size, 0, 0);
- }
+ for (u32 i = 0; i < ARRAY_COUNT(cs->rf_data_ssbos); i++)
+ glNamedBufferStorage(cs->rf_data_ssbos[i], rf_decoded_size, 0, 0);
/* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */
cs->hadamard_dim = (uv2){.x = dec_data_dim.z, .y = dec_data_dim.z};
size hadamard_elements = dec_data_dim.z * dec_data_dim.z;
i32 *hadamard = alloc(&a, i32, hadamard_elements);
fill_hadamard(hadamard, dec_data_dim.z);
+ glDeleteBuffers(1, &cs->hadamard_ssbo);
+ glCreateBuffers(1, &cs->hadamard_ssbo);
+ glNamedBufferStorage(cs->hadamard_ssbo, hadamard_elements * sizeof(i32), hadamard, 0);
- rlUnloadShaderBuffer(cs->hadamard_ssbo);
- cs->hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard, GL_STATIC_DRAW);
- ctx->flags &= ~ALLOC_SSBOS;
+ ctx->flags &= ~ALLOC_SSBOS;
}
static void
@@ -593,11 +592,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
if (ctx->flags & DO_COMPUTE) {
if (ctx->params->upload) {
- glBindBuffer(GL_UNIFORM_BUFFER, ctx->csctx.shared_ubo);
- void *ubo = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY);
- mem_copy((s8){.data = (u8 *)bp, .len = sizeof(*bp)},
- (s8){.data = (u8 *)ubo, .len = sizeof(*bp)});
- glUnmapBuffer(GL_UNIFORM_BUFFER);
+ glNamedBufferSubData(ctx->csctx.shared_ubo, 0, sizeof(*bp), bp);
ctx->params->upload = 0;
}
do_compute_shader(ctx, CS_HADAMARD);
diff --git a/main.c b/main.c
@@ -204,10 +204,9 @@ main(void)
glEnable(GL_DEBUG_OUTPUT);
#endif
- /* NOTE: allocate space for Uniform Buffer Object but don't send anything yet */
- glGenBuffers(1, &ctx.csctx.shared_ubo);
- glBindBuffer(GL_UNIFORM_BUFFER, ctx.csctx.shared_ubo);
- glBufferStorage(GL_UNIFORM_BUFFER, sizeof(BeamformerParameters), 0, GL_MAP_WRITE_BIT);
+ /* NOTE: allocate space for Uniform Buffer but don't send anything yet */
+ glCreateBuffers(1, &ctx.csctx.shared_ubo);
+ glNamedBufferStorage(ctx.csctx.shared_ubo, sizeof(BeamformerParameters), 0, GL_DYNAMIC_STORAGE_BIT);
glGenQueries(CS_LAST, ctx.csctx.timer_ids);
diff --git a/util.c b/util.c
@@ -15,14 +15,6 @@ die(char *fmt, ...)
exit(1);
}
-static void
-mem_copy(s8 src, s8 dest)
-{
- ASSERT(src.len <= dest.len);
- for (size i = 0; i < src.len; i++)
- dest.data[i] = src.data[i];
-}
-
static void *
mem_clear(u8 *p, u8 c, size len)
{