Commit: 2733f538f1d90d77c92fc253629ecfb027f610ec
Parent: 66d6d9e5f8d9d3bded52fcb3bc189f209050dfa9
Author: Randy Palamar
Date: Sun, 8 Sep 2024 14:42:31 -0600
record the time needed for volume computation
Diffstat:
4 files changed, 51 insertions(+), 21 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -111,7 +111,13 @@ do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader)
b32 done = 0;
- /* TODO: volume computation running timer */
+ /* NOTE: we start this elsewhere on the first dispatch so that we can include
+ * times such as decoding/demodulation/etc. */
+ if (!(e->state & ES_TIMER_ACTIVE)) {
+ glQueryCounter(e->timer_ids[0], GL_TIMESTAMP);
+ e->state |= ES_TIMER_ACTIVE;
+ }
+
glUseProgram(cs->programs[shader]);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cs->shared_ubo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, e->rf_data_ssbo);
@@ -129,10 +135,13 @@ do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader)
glUniform3iv(cs->volume_export_dim_offset_id, 1, (i32 *)dim_offset.E);
glDispatchCompute(ORONE(e->volume_dim.x / 32), e->volume_dim.y, 1);
if (e->dispatch_index >= dispatch_count) {
- e->dispatch_index = 0;
- e->state = 0;
- done = 1;
+ e->dispatch_index = 0;
+ e->state &= ~ES_COMPUTING;
+ done = 1;
}
+
+ glQueryCounter(e->timer_ids[1], GL_TIMESTAMP);
+
return done;
}
@@ -205,8 +214,9 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
case CS_UFORCES:
if (ctx->export_ctx.state & ES_START) {
/* NOTE: on the first frame of compute make a copy of the rf data */
- size rf_size = decoded_data_size(csctx);
- ctx->export_ctx.state = ES_COMPUTING;
+ size rf_size = decoded_data_size(csctx);
+ ctx->export_ctx.state &= ~ES_START;
+ ctx->export_ctx.state |= ES_COMPUTING;
glCopyNamedBufferSubData(csctx->rf_data_ssbos[input_ssbo_idx],
ctx->export_ctx.rf_data_ssbo, 0, 0, rf_size);
}
@@ -230,8 +240,19 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
}
static void
-check_compute_timers(ComputeShaderCtx *cs, BeamformerParametersFull *bp)
+check_compute_timers(ComputeShaderCtx *cs, ExportCtx *e, BeamformerParametersFull *bp)
{
+ /* NOTE: volume generation running timer */
+ if (e->state & ES_TIMER_ACTIVE) {
+ u64 start_ns = 0, end_ns = 0;
+ glGetQueryObjectui64v(e->timer_ids[0], GL_QUERY_RESULT, &start_ns);
+ glGetQueryObjectui64v(e->timer_ids[1], GL_QUERY_RESULT, &end_ns);
+ u64 elapsed_ns = end_ns - start_ns;
+ e->runtime += (f32)elapsed_ns * 1e-9;
+ e->state &= ~ES_TIMER_ACTIVE;
+ }
+
+ /* NOTE: main timers for display portion of the program */
u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences);
if (!cs->timer_fences[last_idx])
return;
@@ -261,7 +282,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
}
/* NOTE: Store the compute time for the last frame. */
- check_compute_timers(&ctx->csctx, ctx->params);
+ check_compute_timers(&ctx->csctx, &ctx->export_ctx, ctx->params);
BeamformerParameters *bp = &ctx->params->raw;
/* NOTE: Check for and Load RF Data into GPU */
@@ -308,7 +329,13 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
/* NOTE: we are starting a volume computation on this frame so make some space */
if (ctx->export_ctx.state & ES_START) {
ExportCtx *e = &ctx->export_ctx;
- uv4 edim = e->volume_dim;
+ e->runtime = 0;
+ uv4 edim = e->volume_dim;
+
+ /* NOTE: get a timestamp here which will include decoding/demodulating/etc. */
+ glQueryCounter(e->timer_ids[0], GL_TIMESTAMP);
+ e->state |= ES_TIMER_ACTIVE;
+
glDeleteTextures(1, &e->volume_texture);
glCreateTextures(GL_TEXTURE_3D, 1, &e->volume_texture);
glTextureStorage3D(e->volume_texture, 1, GL_R32F, edim.x, edim.y, edim.z);
@@ -341,10 +368,10 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
/* TODO: this could probably be adapted to do FORCES as well */
b32 done = do_volume_computation_step(ctx, CS_HERCULES);
if (done) {
- ExportCtx *e = &ctx->export_ctx;
- uv4 dim = e->volume_dim;
+ ExportCtx *e = &ctx->export_ctx;
+ uv4 dim = e->volume_dim;
size volume_out_size = dim.x * dim.y * dim.z * sizeof(f32);
- e->volume_buf = os_alloc_arena(e->volume_buf, volume_out_size);
+ e->volume_buf = os_alloc_arena(e->volume_buf, volume_out_size);
glGetTextureImage(e->volume_texture, 0, GL_RED, GL_FLOAT, volume_out_size,
e->volume_buf.beg);
s8 raw = {.len = volume_out_size, .data = e->volume_buf.beg};
diff --git a/beamformer.h b/beamformer.h
@@ -192,14 +192,16 @@ typedef struct {
} FragmentShaderCtx;
enum export_state {
- ES_START = (1 << 0),
- ES_COMPUTING = (1 << 1),
- ES_DONE = (1 << 2),
+ ES_START = (1 << 0),
+ ES_COMPUTING = (1 << 1),
+ ES_TIMER_ACTIVE = (1 << 2),
};
typedef struct {
Arena volume_buf;
uv4 volume_dim;
+ u32 timer_ids[2];
+ f32 runtime;
u32 volume_texture;
i32 volume_texture_id;
u32 rf_data_ssbo;
diff --git a/main.c b/main.c
@@ -234,6 +234,7 @@ main(void)
glNamedBufferStorage(ctx.csctx.shared_ubo, sizeof(BeamformerParameters), 0, GL_DYNAMIC_STORAGE_BIT);
glGenQueries(ARRAY_COUNT(ctx.csctx.timer_fences) * CS_LAST, (u32 *)ctx.csctx.timer_ids);
+ glGenQueries(ARRAY_COUNT(ctx.export_ctx.timer_ids), ctx.export_ctx.timer_ids);
/* NOTE: do not DO_COMPUTE on first frame */
reload_shaders(&ctx, temp_memory);
diff --git a/ui.c b/ui.c
@@ -573,14 +573,14 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
compute_time_sum += cs->last_frame_time[index];
}
- {
- s8 label = s8("Compute Total:");
- pos.y -= measure_text(ctx->font, label).y;
- draw_text(ctx->font, label, pos, 0, colour_from_normalized(FG_COLOUR));
+ static s8 totals[2] = {s8("Compute Total:"), s8("Volume Total:")};
+ f32 times[2] = {compute_time_sum, ctx->export_ctx.runtime};
+ for (u32 i = 0; i < ARRAY_COUNT(totals); i++) {
+ pos.y -= measure_text(ctx->font, totals[i]).y;
+ draw_text(ctx->font, totals[i], pos, 0, colour_from_normalized(FG_COLOUR));
s8 tmp = txt_buf;
- tmp.len = snprintf((char *)txt_buf.data, txt_buf.len, "%0.02e [s]",
- compute_time_sum);
+ tmp.len = snprintf((char *)txt_buf.data, txt_buf.len, "%0.02e [s]", times[i]);
v2 txt_fs = measure_text(ctx->font, tmp);
v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y};
draw_text(ctx->font, tmp, rpos, 0, colour_from_normalized(FG_COLOUR));