ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: ba2fb5d57eecb37a64c2790694cd6d166f35c612
Parent: dbfa80b7ab329fad1db7797dc9f178815e75b3f9
Author: Randy Palamar
Date:   Tue,  1 Oct 2024 16:05:08 -0600

restructure and finish image averaging implementation

Now image averaging should work just fine with multiple arrays. A
bunch of useless uniform calls were dropped as well - images
should use binding points not uniform locations.

Diffstat:
Mbeamformer.c | 114+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mbeamformer.h | 18++++++++++--------
Mbeamformer_parameters.h | 3++-
Mmain.c | 5+----
Mshaders/demod.glsl | 2+-
Mshaders/hadamard.glsl | 2+-
Mshaders/hercules.glsl | 9++++-----
Mshaders/min_max.glsl | 6+++---
Mshaders/render.glsl | 2+-
Mshaders/sum.glsl | 7++++---
Mshaders/uforces.glsl | 7+++----
11 files changed, 98 insertions(+), 77 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -14,23 +14,39 @@ static void alloc_output_image(BeamformerCtx *ctx) { BeamformerParameters *bp = &ctx->params->raw; + ComputeShaderCtx *cs = &ctx->csctx; + ctx->out_data_dim.x = round_down_power_of_2(ORONE(bp->output_points.x)); ctx->out_data_dim.y = round_down_power_of_2(ORONE(bp->output_points.y)); ctx->out_data_dim.z = round_down_power_of_2(ORONE(bp->output_points.z)); + ctx->out_data_dim.w = CLAMP(bp->output_points.w, 0, ARRAY_COUNT(cs->sum_textures)); bp->output_points = ctx->out_data_dim; /* NOTE: allocate storage for beamformed output data; * this is shared between compute and fragment shaders */ uv4 odim = ctx->out_data_dim; u32 max_dim = MAX(odim.x, MAX(odim.y, odim.z)); - /* TODO: does this actually matter or is 0 fine? */ ctx->out_texture_unit = 0; ctx->out_texture_mips = _tzcnt_u32(max_dim) + 1; + glActiveTexture(GL_TEXTURE0); - glDeleteTextures(ARRAY_COUNT(ctx->out_textures), ctx->out_textures); - glGenTextures(ARRAY_COUNT(ctx->out_textures), ctx->out_textures); - for (u32 i = 0; i < ARRAY_COUNT(ctx->out_textures); i++) { - glBindTexture(GL_TEXTURE_3D, ctx->out_textures[i]); + glDeleteTextures(1, &ctx->out_texture); + glGenTextures(1, &ctx->out_texture); + glBindTexture(GL_TEXTURE_3D, ctx->out_texture); + glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z); + + glDeleteTextures(ARRAY_COUNT(cs->sum_textures), cs->sum_textures); + glGenTextures(odim.w, cs->sum_textures); + for (u32 i = 0; i < odim.w; i++) { + glBindTexture(GL_TEXTURE_3D, cs->sum_textures[i]); + glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z); + } + + bp->array_count = CLAMP(bp->array_count, 1, ARRAY_COUNT(cs->array_textures)); + glDeleteTextures(ARRAY_COUNT(cs->array_textures), cs->array_textures); + glGenTextures(bp->array_count, cs->array_textures); + for (u32 i = 0; i < bp->array_count; i++) { + glBindTexture(GL_TEXTURE_3D, cs->array_textures[i]); glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z); } @@ -146,7 +162,6 @@ do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader) glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_3D, e->volume_texture); glBindImageTexture(0, e->volume_texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R32F); - glUniform1i(cs->out_data_tex_id, 0); glUniform1i(cs->volume_export_pass_id, 1); /* NOTE: We must tile this otherwise GL will kill us for taking too long */ @@ -167,12 +182,25 @@ do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader) } static void +do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, u32 out_texture, uv4 out_data_dim) +{ + glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F); + glUniform1f(cs->sum_prescale_id, 1 / (f32)in_texture_count); + for (u32 i = 0; i < in_texture_count; i++) { + glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); + glDispatchCompute(ORONE(out_data_dim.x / 32), + ORONE(out_data_dim.y), + ORONE(out_data_dim.z / 32)); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + } +} + +static void do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) { ComputeShaderCtx *csctx = &ctx->csctx; uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim; size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); - u32 sum_count = ARRAY_COUNT(ctx->out_textures); /* TODO: ctx->params->raw.frame_average_count */ glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[csctx->timer_index][shader]); @@ -212,16 +240,10 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; break; case CS_MIN_MAX: { - u32 texture = ctx->out_textures[ctx->out_texture_index]; - glBindImageTexture(ctx->out_texture_unit, texture, 0, GL_FALSE, 0, - GL_WRITE_ONLY, GL_RG32F); - glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit); + u32 texture = ctx->out_texture; for (u32 i = 1; i < ctx->out_texture_mips; i++) { - u32 otu = ctx->out_texture_unit; - glBindImageTexture(otu + 1, texture, i - 1, GL_FALSE, 0, GL_READ_ONLY, GL_RG32F); - glBindImageTexture(otu + 2, texture, i - 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RG32F); - glUniform1i(csctx->out_data_tex_id, otu + 1); - glUniform1i(csctx->mip_view_tex_id, otu + 2); + glBindImageTexture(0, texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); + glBindImageTexture(1, texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); glUniform1i(csctx->mips_level_id, i); u32 width = ctx->out_data_dim.x >> i; @@ -232,7 +254,7 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) } } break; case CS_HERCULES: - case CS_UFORCES: + case CS_UFORCES: { if (ctx->export_ctx.state & ES_START) { /* NOTE: on the first frame of compute make a copy of the rf data */ size rf_size = decoded_data_size(csctx); @@ -242,15 +264,25 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) ctx->export_ctx.rf_data_ssbo, 0, 0, rf_size); } + BeamformerParameters *bp = &ctx->params->raw; + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); glUniform3iv(csctx->volume_export_dim_offset_id, 1, (i32 []){0, 0, 0}); glUniform1i(csctx->volume_export_pass_id, 0); glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit); - for (u32 i = 0; i < ctx->params->raw.array_count; i++) { - BeamformerParameters *bp = &ctx->params->raw; - u32 tex_idx = (ctx->out_texture_index + i) % ARRAY_COUNT(ctx->out_textures); - u32 texture = ctx->out_textures[tex_idx]; + for (u32 i = 0; i < bp->array_count; i++) { + u32 texture; + if (bp->array_count == 1) { + if (ctx->out_data_dim.w > 0) { + texture = csctx->sum_textures[csctx->sum_texture_index]; + } else { + texture = ctx->out_texture; + } + } else { + texture = csctx->array_textures[i]; + } + m3 xdc_transform = observation_direction_to_xdc_space((v3){.z = 1}, bp->xdc_origin[i].xyz, bp->xdc_corner1[i].xyz, @@ -258,36 +290,27 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) glBindTexture(GL_TEXTURE_3D, texture); glBindImageTexture(ctx->out_texture_unit, texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); - glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit); glUniform1i(csctx->xdc_index_id, i); glUniformMatrix3fv(csctx->xdc_transform_id, 1, GL_FALSE, xdc_transform.E); glDispatchCompute(ORONE(ctx->out_data_dim.x / 32), ctx->out_data_dim.y, ORONE(ctx->out_data_dim.z / 32)); } - if (ctx->params->raw.array_count == 1) - break; - /* TODO: we can't use this as a ring buffer yet since we need to keep a fixed - * index for output display */ - //ctx->out_texture_index = (ctx->out_texture_index + ctx->params->raw.array_count) - // % ARRAY_COUNT(ctx->out_textures); - sum_count = ctx->params->raw.array_count; - /* NOTE: FALLTHROUGH; sum implicit when beamforming mulitple arrays */ + if (bp->array_count > 1) { + if (ctx->out_data_dim.w > 0) + do_sum_shader(csctx, csctx->array_textures, bp->array_count, + csctx->sum_textures[csctx->sum_texture_index], + ctx->out_data_dim); + else + do_sum_shader(csctx, csctx->array_textures, bp->array_count, + ctx->out_texture, ctx->out_data_dim); + } + } break; case CS_SUM: { - u32 otu = 0; - /* TODO: extra "display" texture to always sum into */ - u32 out_texture = ctx->out_textures[ctx->out_texture_index]; - glBindImageTexture(otu, out_texture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RG32F); - glUniform1i(csctx->sum_out_img_id, otu); - for (u32 i = ctx->out_texture_index + 1; i < sum_count; i++) { - u32 tex_idx = i % ARRAY_COUNT(ctx->out_textures); - u32 in_texture = ctx->out_textures[tex_idx]; - glBindImageTexture(otu + 1, in_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG32F); - glUniform1i(csctx->sum_in_img_id, otu + 1); - glDispatchCompute(ORONE(ctx->out_data_dim.x / 32), - ctx->out_data_dim.y, - ORONE(ctx->out_data_dim.z / 32)); - glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + u32 frame_count = ctx->out_data_dim.w; + if (frame_count) { + do_sum_shader(csctx, csctx->sum_textures, frame_count, ctx->out_texture, ctx->out_data_dim); + csctx->sum_texture_index = (csctx->sum_texture_index + 1) % frame_count; } } break; default: ASSERT(0); @@ -444,8 +467,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) FragmentShaderCtx *fs = &ctx->fsctx; glUseProgram(fs->shader.id); glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit); - /* TODO: seperate display texture */ - glBindTexture(GL_TEXTURE_3D, ctx->out_textures[ctx->out_texture_index]); + glBindTexture(GL_TEXTURE_3D, ctx->out_texture); glUniform1i(fs->out_data_tex_id, ctx->out_texture_unit); glUniform1f(fs->db_cutoff_id, fs->db); DrawTexture(fs->output.texture, 0, 0, WHITE); diff --git a/beamformer.h b/beamformer.h @@ -164,6 +164,14 @@ typedef struct { GLsync timer_fences[MAX_FRAMES_IN_FLIGHT]; f32 last_frame_time[CS_LAST]; + /* NOTE: circular buffer of textures for averaging. + * Only allocated up to configured frame average count */ + u32 sum_textures[16]; + u32 sum_texture_index; + + /* NOTE: array output textures. Only allocated up to configured array count */ + u32 array_textures[4]; + /* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled * transfers when the GPU is running behind the CPU. It is not mapped on NVIDIA because * their drivers _will_ store the buffer in the system memory. This doesn't happen @@ -187,16 +195,13 @@ typedef struct { uv4 dec_data_dim; uv2 rf_raw_dim; - i32 out_data_tex_id; - i32 mip_view_tex_id; i32 mips_level_id; i32 volume_export_pass_id; i32 volume_export_dim_offset_id; i32 xdc_transform_id; i32 xdc_index_id; - i32 sum_out_img_id; - i32 sum_in_img_id; + i32 sum_prescale_id; } ComputeShaderCtx; typedef struct { @@ -238,10 +243,7 @@ typedef struct { InputState is; uv4 out_data_dim; - /* NOTE: circular buffer of output textures; useful for multi array or averaging */ - /* TODO: support averaging for multi array imaging */ - u32 out_textures[4]; - u32 out_texture_index; + u32 out_texture; u32 out_texture_unit; u32 out_texture_mips; diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -21,7 +21,7 @@ typedef struct { v4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ v4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ uv4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uv4 output_points; /* Width * Height * Depth; last element ignored */ + uv4 output_points; /* Width * Height * Depth * (Frame Average Count) */ v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */ v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/ uv2 rf_raw_dim; /* Raw Data Dimensions */ @@ -36,4 +36,5 @@ typedef struct { u32 uforces; /* mode is UFORCES (1) or FORCES (0) */ f32 off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */ + f32 _pad[3]; } BeamformerParameters; diff --git a/main.c b/main.c @@ -127,12 +127,9 @@ reload_shaders(BeamformerCtx *ctx, Arena a) csctx->xdc_index_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_xdc_index"); - csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex"); - csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex"); csctx->mips_level_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_map"); - csctx->sum_out_img_id = glGetUniformLocation(csctx->programs[CS_SUM], "u_sum_out_img"); - csctx->sum_in_img_id = glGetUniformLocation(csctx->programs[CS_SUM], "u_sum_in_img"); + csctx->sum_prescale_id = glGetUniformLocation(csctx->programs[CS_SUM], "u_prescale"); Shader updated_fs = LoadShader(NULL, "shaders/render.glsl"); if (updated_fs.id != rlGetShaderIdDefault()) { diff --git a/shaders/demod.glsl b/shaders/demod.glsl @@ -18,7 +18,7 @@ layout(std140, binding = 0) uniform parameters { vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ vec4 output_min_coord; /* [m] Top left corner of output region */ vec4 output_max_coord; /* [m] Bottom right corner of output region */ uvec2 rf_raw_dim; /* Raw Data Dimensions */ diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl @@ -22,7 +22,7 @@ layout(std140, binding = 0) uniform parameters { vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ vec4 output_min_coord; /* [m] Top left corner of output region */ vec4 output_max_coord; /* [m] Bottom right corner of output region */ uvec2 rf_raw_dim; /* Raw Data Dimensions */ diff --git a/shaders/hercules.glsl b/shaders/hercules.glsl @@ -14,7 +14,7 @@ layout(std140, binding = 0) uniform parameters { vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ vec4 output_min_coord; /* [m] Top left corner of output region */ vec4 output_max_coord; /* [m] Bottom right corner of output region */ uvec2 rf_raw_dim; /* Raw Data Dimensions */ @@ -31,7 +31,7 @@ layout(std140, binding = 0) uniform parameters { int beamform_plane; /* Plane to Beamform in 2D HERCULES */ }; -layout(rg32f, location = 1) uniform writeonly image3D u_out_data_tex; +layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex; layout(location = 2) uniform int u_volume_export_pass; layout(location = 3) uniform ivec3 u_volume_export_dim_offset; @@ -94,7 +94,7 @@ void main() vec3 voxel = vec3(gl_GlobalInvocationID.xyz) + vec3(u_volume_export_dim_offset); ivec3 out_coord = ivec3(gl_GlobalInvocationID.xyz) + u_volume_export_dim_offset; - /* NOTE: Convert pixel to physical coordinates */ + /* NOTE: Convert voxel to physical coordinates */ vec3 edge1 = xdc_corner1[u_xdc_index].xyz - xdc_origin[u_xdc_index].xyz; vec3 edge2 = xdc_corner2[u_xdc_index].xyz - xdc_origin[u_xdc_index].xyz; vec3 image_point = calc_image_point(voxel); @@ -167,6 +167,5 @@ void main() rdist[direction] = starting_point[direction]; rdist[direction ^ 1] -= delta[direction ^ 1]; } - float val = length(sum); - imageStore(u_out_data_tex, out_coord, vec4(val)); + imageStore(u_out_data_tex, out_coord, vec4(length(sum))); } diff --git a/shaders/min_max.glsl b/shaders/min_max.glsl @@ -5,9 +5,9 @@ #version 460 core layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in; -layout(rg32f, location = 1) uniform readonly image3D u_out_data_tex; -layout(rg32f, location = 2) uniform writeonly image3D u_mip_view_tex; -layout(location = 3) uniform int u_mip_map = 0; +layout(rg32f, binding = 0) readonly uniform image3D u_out_data_tex; +layout(rg32f, binding = 1) writeonly uniform image3D u_mip_view_tex; +layout(location = 1) uniform int u_mip_map = 0; void main() { diff --git a/shaders/render.glsl b/shaders/render.glsl @@ -28,7 +28,7 @@ void main() float smp = length(texelFetch(u_out_data_tex, smp_coord, 0).xy); float absmax = max(abs(min_max.y), abs(min_max.x)); - smp = 20 * log(abs(smp) / absmax) / log(10); + smp = 20 * log(smp / absmax) / log(10); smp = clamp(smp, u_db_cutoff, 0) / u_db_cutoff; smp = 1 - smp; diff --git a/shaders/sum.glsl b/shaders/sum.glsl @@ -2,12 +2,13 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in; -layout(rg32f, location = 1) uniform image3D u_out_img; -layout(rg32f, location = 2) readonly uniform image3D u_in_img; +layout(rg32f, binding = 0) uniform image3D u_out_img; +layout(rg32f, binding = 1) readonly uniform image3D u_in_img; +layout(location = 1) uniform float u_prescale = 1.0; void main() { ivec3 voxel = ivec3(gl_GlobalInvocationID); - vec4 sum = imageLoad(u_out_img, voxel) + imageLoad(u_in_img, voxel); + vec4 sum = imageLoad(u_out_img, voxel) + u_prescale * imageLoad(u_in_img, voxel); imageStore(u_out_img, voxel, sum); } diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl @@ -14,7 +14,7 @@ layout(std140, binding = 0) uniform parameters { vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ vec4 output_min_coord; /* [m] Top left corner of output region */ vec4 output_max_coord; /* [m] Bottom right corner of output region */ uvec2 rf_raw_dim; /* Raw Data Dimensions */ @@ -31,7 +31,7 @@ layout(std140, binding = 0) uniform parameters { int beamform_plane; /* Plane to Beamform in 2D HERCULES */ }; -layout(rg32f, location = 1) writeonly uniform image3D u_out_data_tex; +layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex; layout(location = 2) uniform int u_volume_export_pass; layout(location = 3) uniform ivec3 u_volume_export_dim_offset; @@ -146,6 +146,5 @@ void main() ridx += dec_data_dim.x; } } - float val = length(sum); - imageStore(u_out_data_tex, out_coord, vec4(val, val, 0, 0)); + imageStore(u_out_data_tex, out_coord, vec4(length(sum))); }