ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: d3576ea1a8951f6d2a601ca573563d1ecb5239f3
Parent: f22798d2a027f8a51d0187e0da7cd9451eed0cb9
Author: Randy Palamar
Date:   Sat,  7 Sep 2024 16:51:32 -0600

raw volume output dumping

This version is limited due to the GL driver killing your process
if a single call takes too long. It will be fixed soon.

Diffstat:
Mbeamformer.c | 50+++++++++++++++++++++++++++++++++++++++++++-------
Mbeamformer.h | 13+++++++------
Mmain.c | 9++++++---
Mos_unix.c | 11+++++++++++
Mos_win32.c | 18++++++++++++++++++
Mshaders/2d_hercules.glsl | 16+++++++++++-----
Mui.c | 6++++--
7 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -31,8 +31,6 @@ alloc_output_image(BeamformerCtx *ctx) //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X); //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_TRILINEAR); SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_BILINEAR); - - ctx->flags &= ~ALLOC_OUT_TEX; } static void @@ -94,8 +92,6 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a) glDeleteBuffers(1, &cs->hadamard_ssbo); glCreateBuffers(1, &cs->hadamard_ssbo); glNamedBufferStorage(cs->hadamard_ssbo, hadamard_elements * sizeof(i32), hadamard, 0); - - ctx->flags &= ~ALLOC_SSBOS; } static void @@ -166,12 +162,30 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) break; case CS_HERCULES: case CS_UFORCES: + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); + + /* NOTE: Do a volume computation before doing the normal display path */ + if (ctx->export_ctx.state & ES_START) { + /* TODO: for large data this must be split over multiple compute calls + * otherwise the GL driver will kill you */ + ExportCtx *e = &ctx->export_ctx; + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_3D, e->volume_texture); + glBindImageTexture(0, e->volume_texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R32F); + glUniform1i(e->volume_texture_id, 0); + glUniform1i(csctx->volume_export_pass_id, 1); + glDispatchCompute(ORONE(e->volume_dim.x / 32), + e->volume_dim.y, + ORONE(e->volume_dim.z / 32)); + ctx->export_ctx.state = ES_DONE; + } + + glUniform1i(csctx->volume_export_pass_id, 0); glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit); glBindTexture(GL_TEXTURE_3D, ctx->out_texture); glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); glDispatchCompute(ORONE(ctx->out_data_dim.x / 32), ctx->out_data_dim.y, ORONE(ctx->out_data_dim.z / 32)); @@ -220,10 +234,10 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) /* NOTE: Check for and Load RF Data into GPU */ if (os_poll_pipe(ctx->data_pipe)) { ComputeShaderCtx *cs = &ctx->csctx; - if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim) || ctx->flags & ALLOC_SSBOS) + if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) alloc_shader_storage(ctx, arena); - if (!uv4_equal(ctx->out_data_dim, bp->output_points) || ctx->flags & ALLOC_OUT_TEX) + if (!uv4_equal(ctx->out_data_dim, bp->output_points)) alloc_output_image(ctx); cs->raw_data_index = (cs->raw_data_index + 1) % ARRAY_COUNT(cs->raw_data_fences); @@ -263,6 +277,15 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) glNamedBufferSubData(ctx->csctx.shared_ubo, 0, sizeof(*bp), bp); ctx->params->upload = 0; } + + if (ctx->export_ctx.state & ES_START) { + uv4 edim = ctx->export_ctx.volume_dim; + glDeleteTextures(1, &ctx->export_ctx.volume_texture); + glCreateTextures(GL_TEXTURE_3D, 1, &ctx->export_ctx.volume_texture); + glTextureStorage3D(ctx->export_ctx.volume_texture, 1, GL_R32F, + edim.x, edim.y, edim.z); + } + u32 stages = ctx->params->compute_stages_count; for (u32 i = 0; i < stages; i++) { do_compute_shader(ctx, ctx->params->compute_stages[i]); @@ -274,6 +297,19 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) glDeleteSync(ctx->csctx.timer_fences[tidx]); ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences); + + if (ctx->export_ctx.state & ES_DONE) { + ExportCtx *e = &ctx->export_ctx; + uv4 dim = e->volume_dim; + size volume_out_size = dim.x * dim.y * dim.z * sizeof(f32); + e->volume_buf = os_alloc_arena(e->volume_buf, volume_out_size); + glGetTextureImage(e->volume_texture, 0, GL_RED, GL_FLOAT, volume_out_size, + e->volume_buf.beg); + s8 raw = {.len = volume_out_size, .data = e->volume_buf.beg}; + if (!os_write_file("raw_volume.bin", raw)) + TraceLog(LOG_WARNING, "failed to write output volume\n"); + ctx->export_ctx.state = 0; + } } /* NOTE: draw output image texture using render fragment shader */ diff --git a/beamformer.h b/beamformer.h @@ -49,8 +49,6 @@ typedef union { enum program_flags { RELOAD_SHADERS = 1 << 0, - ALLOC_SSBOS = 1 << 1, - ALLOC_OUT_TEX = 1 << 2, GEN_MIPMAPS = 1 << 29, DO_COMPUTE = 1 << 30, }; @@ -181,6 +179,7 @@ typedef struct { i32 out_data_tex_id; i32 mip_view_tex_id; i32 mips_level_id; + i32 volume_export_pass_id; } ComputeShaderCtx; typedef struct { @@ -198,10 +197,12 @@ enum export_state { }; typedef struct { - uv4 volume_dim; - u32 rf_data_ssbo; - u32 volume_texture; - u32 state; + Arena volume_buf; + uv4 volume_dim; + u32 volume_texture; + i32 volume_texture_id; + u32 output_ssbo; + u32 state; } ExportCtx; typedef struct { diff --git a/main.c b/main.c @@ -117,6 +117,11 @@ reload_shaders(BeamformerCtx *ctx, Arena a) glDeleteShader(shader_id); } + ctx->export_ctx.volume_texture_id = glGetUniformLocation(csctx->programs[CS_HERCULES], + "u_out_volume_tex"); + csctx->volume_export_pass_id = glGetUniformLocation(csctx->programs[CS_HERCULES], + "u_volume_export_pass"); + csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex"); csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex"); csctx->mips_level_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_map"); @@ -168,8 +173,8 @@ main(void) Arena temp_memory = os_alloc_arena((Arena){0}, 8 * MEGABYTE); ctx.window_size = (uv2){.w = 1280, .h = 840}; - ctx.out_data_dim = (uv4){.x = 256, .y = 1024, .z = 1}; + ctx.out_data_dim = (uv4){.x = 1, .y = 1, .z = 1}; ctx.export_ctx.volume_dim = (uv4){.x = 1, .y = 1, .z = 1}; SetConfigFlags(FLAG_VSYNC_HINT); @@ -232,8 +237,6 @@ main(void) reload_shaders(&ctx, temp_memory); ctx.flags &= ~DO_COMPUTE; - ctx.flags |= ALLOC_SSBOS|ALLOC_OUT_TEX; - while(!WindowShouldClose()) { do_debug(); if (ctx.gl_vendor_id == GL_VENDOR_NVIDIA) diff --git a/os_unix.c b/os_unix.c @@ -61,6 +61,17 @@ os_read_file(Arena *a, char *fname, size fsize) return ret; } +static b32 +os_write_file(char *fname, s8 raw) +{ + i32 fd = open(fname, O_WRONLY|O_TRUNC|O_CREAT, 0600); + if (fd < 0) + return 0; + size wlen = write(fd, raw.data, raw.len); + close(fd); + return wlen == raw.len; +} + static os_file_stats os_get_file_stats(char *fname) { diff --git a/os_win32.c b/os_win32.c @@ -71,6 +71,24 @@ os_read_file(Arena *a, char *fname, size fsize) return ret; } +static b32 +os_write_file(char *fname, s8 raw) +{ + if (raw.size > (size)U32_MAX) { + fputs("os_write_file: writing files > 4GB is not yet support on win32\n", stderr); + return 0; + } + + HANDLE h = CreateFileA(fname, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0); + if (h == INVALID_HANDLE_VALUE) + return 0; + + DWORD wlen = 0; + WriteFile(h, raw.data, raw.len, &wlen, 0); + CloseHandle(h); + return wlen == raw.len; +} + static os_file_stats os_get_file_stats(char *fname) { diff --git a/shaders/2d_hercules.glsl b/shaders/2d_hercules.glsl @@ -28,6 +28,9 @@ layout(std140, binding = 0) uniform parameters { }; layout(rg32f, location = 1) uniform writeonly image3D u_out_data_tex; +layout(r32f, location = 2) uniform writeonly image3D u_out_volume_tex; + +layout(location = 3) uniform int u_volume_export_pass; #define C_SPLINE 0.5 @@ -67,7 +70,9 @@ void main() { vec3 voxel = vec3(gl_GlobalInvocationID.xyz); ivec3 out_coord = ivec3(gl_GlobalInvocationID.xyz); - ivec3 out_data_dim = imageSize(u_out_data_tex); + ivec3 out_data_dim; + if (u_volume_export_pass == 0) out_data_dim = imageSize(u_out_data_tex); + else out_data_dim = imageSize(u_out_volume_tex); /* NOTE: Convert pixel to physical coordinates */ vec2 xdc_size = abs(xdc_max_xy - xdc_min_xy); @@ -101,7 +106,7 @@ void main() vec2 sum = vec2(0); vec3 rdist = starting_dist; - int direction = 1; + int direction = 1 * (u_volume_export_pass ^ 1); uint ridx = 0; /* NOTE: For Each Acquistion in Raw Data */ for (uint i = 0; i < dec_data_dim.z; i++) { @@ -129,9 +134,10 @@ void main() ridx += dec_data_dim.x; } - rdist[direction] = starting_dist[direction]; - rdist[(~direction) & 1] -= delta[(~direction) & 1]; + rdist[direction] = starting_dist[direction]; + rdist[direction ^ 1] -= delta[direction ^ 1]; } float val = length(sum); - imageStore(u_out_data_tex, out_coord, vec4(val, val, 0, 0)); + if (u_volume_export_pass == 0) imageStore(u_out_data_tex, out_coord, vec4(val)); + else imageStore(u_out_volume_tex, out_coord, vec4(val)); } diff --git a/ui.c b/ui.c @@ -526,8 +526,10 @@ draw_settings_ui(BeamformerCtx *ctx, Arena arena, Rect r, v2 mouse) btn_r.size.h = ctx->font.baseSize * 1.3; btn_r.size.w *= 0.6; if (do_text_button(ctx, s8("Dump Raw Volume"), btn_r, mouse, hover_t + idx++)) { - if (!ctx->export_ctx.state) - ctx->export_ctx.state = ES_START; + if (!ctx->export_ctx.state) { + ctx->export_ctx.state = ES_START; + ctx->flags |= DO_COMPUTE; + } } /* NOTE: if C compilers didn't suck this would be a static assert */