Commit: 16c104c39dd46f04a190170a2755fd612685dafc
Parent: 8b852907f91a6445a5a1b0ea20dcb192936dac28
Author: Randy Palamar
Date:   Thu, 15 Aug 2024 07:30:13 -0600
fix compute timer race
The only reason this was working on the lab computer was because it was dog slow.
Diffstat:
2 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -502,6 +502,24 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
 	}
 }
 
+static void
+check_compute_timers(ComputeShaderCtx *cs)
+{
+	u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences);
+	if (!cs->timer_fences[last_idx])
+		return;
+
+	i32 timer_status, _unused;
+	glGetSynciv(cs->timer_fences[last_idx], GL_SYNC_STATUS, 4, &_unused, &timer_status);
+	if (timer_status != GL_SIGNALED)
+		return;
+
+	for (u32 i = 0; i < ARRAY_COUNT(cs->timer_ids); i++) {
+		u64 ns = 0;
+		glGetQueryObjectui64v(cs->timer_ids[i], GL_QUERY_RESULT, &ns);
+		cs->last_frame_time[i] = (f32)ns / 1e9;
+	}
+}
 
 DEBUG_EXPORT void
 do_beamformer(BeamformerCtx *ctx, Arena arena)
@@ -514,17 +532,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
 	}
 
 	/* NOTE: Store the compute time for the last frame. */
-	if (ctx->csctx.timer_fence) {
-		i32 timer_status, _unused;
-		glGetSynciv(ctx->csctx.timer_fence, GL_SYNC_STATUS, 4, &_unused, &timer_status);
-		if (timer_status == GL_SIGNALED) {
-			for (u32 i = 0; i < ARRAY_COUNT(ctx->csctx.timer_ids); i++) {
-				u64 ns = 0;
-				glGetQueryObjectui64v(ctx->csctx.timer_ids[i], GL_QUERY_RESULT, &ns);
-				ctx->csctx.last_frame_time[i] = (f32)ns / 1e9;
-			}
-		}
-	}
+	check_compute_timers(&ctx->csctx);
 
 	BeamformerParameters *bp = &ctx->params->raw;
 	/* NOTE: Check for and Load RF Data into GPU */
@@ -577,8 +585,11 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
 		do_compute_shader(ctx, CS_MIN_MAX);
 		ctx->flags &= ~DO_COMPUTE;
 		ctx->flags |= GEN_MIPMAPS;
-		glDeleteSync(ctx->csctx.timer_fence);
-		ctx->csctx.timer_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+
+		u32 tidx = ctx->csctx.timer_index;
+		glDeleteSync(ctx->csctx.timer_fences[tidx]);
+		ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+		ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences);
 	}
 
 	/* NOTE: draw output image texture using render fragment shader */
diff --git a/beamformer.h b/beamformer.h
@@ -97,9 +97,9 @@ typedef struct {
 typedef struct {
 	u32 programs[CS_LAST];
 
+	u32    timer_index;
 	u32    timer_ids[CS_LAST];
-	i32    timer_idx;
-	GLsync timer_fence;
+	GLsync timer_fences[3];
 	f32    last_frame_time[CS_LAST];
 
 	/* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled