ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 6b154dc205c1cdd2526dabb90bc584d59de8f72f
Parent: c8cb8921ac7ed0aa2c0ab18bbb0bb6c669c6d0c0
Author: Randy Palamar
Date:   Fri, 26 Jul 2024 13:45:49 -0600

add basic code for profiling compute shader stages

Diffstat:
Mbeamformer.c | 54+++++++++++++++++++++++++++++++++++++++++++-----------
Mbeamformer.h | 45+++++++++++++++++++++++++--------------------
Mmain.c | 2++
Mutil.h | 1+
4 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -87,6 +87,9 @@ static void do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) { ComputeShaderCtx *csctx = &ctx->csctx; + + glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[shader]); + glUseProgram(csctx->programs[shader]); glBindBufferBase(GL_UNIFORM_BUFFER, 0, csctx->shared_ubo); @@ -149,6 +152,8 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) break; default: ASSERT(0); } + + glEndQuery(GL_TIME_ELAPSED); } static Color @@ -243,7 +248,7 @@ draw_settings_ui(BeamformerCtx *ctx, Arena arena, f32 dt, Rect r, v2 mouse) v2 pos = r.pos; pos.y += 50; - pos.x += 10; + pos.x += 20; s8 txt = s8alloc(&arena, 64); @@ -342,7 +347,7 @@ draw_settings_ui(BeamformerCtx *ctx, Arena arena, f32 dt, Rect r, v2 mouse) } static void -draw_debug_overlay(BeamformerCtx *ctx, Arena arena, f32 dt) +draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r, f32 dt) { DrawFPS(20, 20); @@ -350,17 +355,29 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, f32 dt) u32 fontsize = ctx->font_size; u32 fontspace = ctx->font_spacing; - s8 partial_txt = s8alloc(&arena, 64); - snprintf((char *)partial_txt.data, partial_txt.len, "Partial Transfers: %u", ctx->partial_transfer_count); + static char *labels[CS_LAST] = { + [CS_HADAMARD] = "Decoding:", + [CS_LPF] = "LPF:", + [CS_MIN_MAX] = "Min/Max:", + [CS_UFORCES] = "UFORCES:", + }; - v2 partial_fs = {.rl = MeasureTextEx(ctx->font, (char *)partial_txt.data, fontsize, fontspace)}; + ComputeShaderCtx *cs = &ctx->csctx; - v2 pos = {.x = 20, .y = ws.h - partial_fs.y - 20}; - /* NOTE: Partial Tranfers */ - { - DrawTextEx(ctx->font, (char *)partial_txt.data, pos.rl, fontsize, fontspace, + s8 txt_buf = s8alloc(&arena, 64); + v2 pos = {.x = 20, .y = ws.h - 10}; + for (u32 i = 0; i < CS_LAST; i++) { + v2 txt_fs = {.rl = MeasureTextEx(ctx->font, labels[i], fontsize, fontspace)}; + pos.y -= txt_fs.y; + + DrawTextEx(ctx->font, labels[i], pos.rl, fontsize, fontspace, + colour_from_normalized(FG_COLOUR)); + + snprintf((char *)txt_buf.data, txt_buf.len, "%0.02e [s]", cs->last_frame_time[i]); + txt_fs.rl = MeasureTextEx(ctx->font, (char *)txt_buf.data, fontsize, fontspace); + v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y}; + DrawTextEx(ctx->font, (char *)txt_buf.data, rpos.rl, fontsize, fontspace, colour_from_normalized(FG_COLOUR)); - pos.y += partial_fs.y; } { @@ -404,6 +421,19 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) ctx->window_size.w = GetScreenWidth(); } + /* NOTE: Store the compute time for the last frame. */ + { + i32 timer_status, _unused; + glGetSynciv(ctx->csctx.timer_fence, GL_SYNC_STATUS, 4, &_unused, &timer_status); + if (timer_status == GL_SIGNALED) { + for (u32 i = 0; i < ARRAY_COUNT(ctx->csctx.timer_ids); i++) { + u64 ns = 0; + glGetQueryObjectui64v(ctx->csctx.timer_ids[i], GL_QUERY_RESULT, &ns); + ctx->csctx.last_frame_time[i] = (f32)ns / 1e9; + } + } + } + BeamformerParameters *bp = &ctx->params->raw; /* NOTE: Check for and Load RF Data into GPU */ if (os_poll_pipe(ctx->data_pipe)) { @@ -441,6 +471,8 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) do_compute_shader(ctx, CS_UFORCES); do_compute_shader(ctx, CS_MIN_MAX); ctx->flags &= ~DO_COMPUTE; + glDeleteSync(ctx->csctx.timer_fence); + ctx->csctx.timer_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } /* NOTE: draw output image texture using render fragment shader */ @@ -615,7 +647,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) } draw_settings_ui(ctx, arena, dt, lr, mouse); - draw_debug_overlay(ctx, arena, dt); + draw_debug_overlay(ctx, arena, lr, dt); EndDrawing(); if (IsKeyPressed(KEY_R)) diff --git a/beamformer.h b/beamformer.h @@ -54,9 +54,34 @@ enum program_flags { DO_COMPUTE = 1 << 30, }; +#include "beamformer_parameters.h" +typedef struct { + BeamformerParameters raw; + b32 upload; +} BeamformerParametersFull; + +#if defined(__unix__) + #include "os_unix.c" + + #define OS_PIPE_NAME "/tmp/beamformer_data_fifo" + #define OS_SMEM_NAME "/ogl_beamformer_parameters" +#elif defined(_WIN32) + #include "os_win32.c" + + #define OS_PIPE_NAME "\\\\.\\pipe\\beamformer_data_fifo" + #define OS_SMEM_NAME "Local\\ogl_beamformer_parameters" +#else + #error Unsupported Platform! +#endif + typedef struct { u32 programs[CS_LAST]; + u32 timer_ids[CS_LAST]; + i32 timer_idx; + GLsync timer_fence; + f32 last_frame_time[CS_LAST]; + /* NOTE: One SSBO for raw data and two for decoded data (swapped for chained stages)*/ u32 raw_data_ssbo; u32 rf_data_ssbos[2]; @@ -85,26 +110,6 @@ typedef struct { f32 db; } FragmentShaderCtx; -#include "beamformer_parameters.h" -typedef struct { - BeamformerParameters raw; - b32 upload; -} BeamformerParametersFull; - -#if defined(__unix__) - #include "os_unix.c" - - #define OS_PIPE_NAME "/tmp/beamformer_data_fifo" - #define OS_SMEM_NAME "/ogl_beamformer_parameters" -#elif defined(_WIN32) - #include "os_win32.c" - - #define OS_PIPE_NAME "\\\\.\\pipe\\beamformer_data_fifo" - #define OS_SMEM_NAME "Local\\ogl_beamformer_parameters" -#else - #error Unsupported Platform! -#endif - typedef struct { uv2 window_size; u32 flags; diff --git a/main.c b/main.c @@ -169,6 +169,8 @@ main(void) glBindBuffer(GL_UNIFORM_BUFFER, ctx.csctx.shared_ubo); glBufferData(GL_UNIFORM_BUFFER, sizeof(BeamformerParameters), 0, GL_STATIC_DRAW); + glGenQueries(CS_LAST, ctx.csctx.timer_ids); + ctx.flags |= RELOAD_SHADERS|ALLOC_SSBOS|ALLOC_OUT_TEX|UPLOAD_FILTER; while(!WindowShouldClose()) { diff --git a/util.h b/util.h @@ -31,6 +31,7 @@ typedef uint8_t u8; typedef int16_t i16; typedef int32_t i32; typedef uint32_t u32; +typedef uint64_t u64; typedef uint32_t b32; typedef float f32; typedef double f64;