Commit: 16c104c39dd46f04a190170a2755fd612685dafc
Parent: 8b852907f91a6445a5a1b0ea20dcb192936dac28
Author: Randy Palamar
Date: Thu, 15 Aug 2024 07:30:13 -0600
fix compute timer race
The only reason this was working on the lab computer was because it was dog slow.
Diffstat:
2 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -502,6 +502,24 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
}
}
+static void
+check_compute_timers(ComputeShaderCtx *cs)
+{
+ u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences);
+ if (!cs->timer_fences[last_idx])
+ return;
+
+ i32 timer_status, _unused;
+ glGetSynciv(cs->timer_fences[last_idx], GL_SYNC_STATUS, 4, &_unused, &timer_status);
+ if (timer_status != GL_SIGNALED)
+ return;
+
+ for (u32 i = 0; i < ARRAY_COUNT(cs->timer_ids); i++) {
+ u64 ns = 0;
+ glGetQueryObjectui64v(cs->timer_ids[i], GL_QUERY_RESULT, &ns);
+ cs->last_frame_time[i] = (f32)ns / 1e9;
+ }
+}
DEBUG_EXPORT void
do_beamformer(BeamformerCtx *ctx, Arena arena)
@@ -514,17 +532,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
}
/* NOTE: Store the compute time for the last frame. */
- if (ctx->csctx.timer_fence) {
- i32 timer_status, _unused;
- glGetSynciv(ctx->csctx.timer_fence, GL_SYNC_STATUS, 4, &_unused, &timer_status);
- if (timer_status == GL_SIGNALED) {
- for (u32 i = 0; i < ARRAY_COUNT(ctx->csctx.timer_ids); i++) {
- u64 ns = 0;
- glGetQueryObjectui64v(ctx->csctx.timer_ids[i], GL_QUERY_RESULT, &ns);
- ctx->csctx.last_frame_time[i] = (f32)ns / 1e9;
- }
- }
- }
+ check_compute_timers(&ctx->csctx);
BeamformerParameters *bp = &ctx->params->raw;
/* NOTE: Check for and Load RF Data into GPU */
@@ -577,8 +585,11 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
do_compute_shader(ctx, CS_MIN_MAX);
ctx->flags &= ~DO_COMPUTE;
ctx->flags |= GEN_MIPMAPS;
- glDeleteSync(ctx->csctx.timer_fence);
- ctx->csctx.timer_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+
+ u32 tidx = ctx->csctx.timer_index;
+ glDeleteSync(ctx->csctx.timer_fences[tidx]);
+ ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences);
}
/* NOTE: draw output image texture using render fragment shader */
diff --git a/beamformer.h b/beamformer.h
@@ -97,9 +97,9 @@ typedef struct {
typedef struct {
u32 programs[CS_LAST];
+ u32 timer_index;
u32 timer_ids[CS_LAST];
- i32 timer_idx;
- GLsync timer_fence;
+ GLsync timer_fences[3];
f32 last_frame_time[CS_LAST];
/* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled