ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: ba51d2eaa3468f41aa24be1cb14753561bbb7673
Parent: dbe78a8bfb4cac017d58a417150ad3d459d7d97b
Author: Randy Palamar
Date:   Wed, 10 Jul 2024 07:39:15 -0600

use UBO for beamformer parameters

Diffstat:
Mbeamformer.c | 11++++++++++-
Mbeamformer.h | 4+++-
Mbeamformer_parameters.h | 5+++--
Mmain.c | 6+++++-
Mshaders/hadamard.glsl | 20+++++++++++++++-----
Mshaders/uforces.glsl | 40++++++++++++++++++++++++----------------
Mutil.c | 8++++++++
7 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -50,11 +50,12 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) ComputeShaderCtx *csctx = &ctx->csctx; glUseProgram(csctx->programs[shader]); - glUniform3uiv(csctx->rf_data_dim_id, 1, csctx->rf_data_dim.E); glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RG32F); glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, csctx->shared_ubo); + u32 rf_ssbo_idx = 0; u32 decoded_ssbo_idx = 1; switch (shader) { @@ -181,6 +182,14 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) } if (ctx->flags & DO_COMPUTE) { + if (ctx->flags & UPLOAD_UBO) { + glBindBuffer(GL_UNIFORM_BUFFER, ctx->csctx.shared_ubo); + void *ubo = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY); + mem_copy((s8){.data = (u8 *)ctx->params, .len = sizeof(BeamformerParameters)}, + (s8){.data = (u8 *)ubo, .len = sizeof(BeamformerParameters)}); + glUnmapBuffer(GL_UNIFORM_BUFFER); + ctx->flags &= ~UPLOAD_UBO; + } do_compute_shader(ctx, CS_HADAMARD); do_compute_shader(ctx, CS_UFORCES); do_compute_shader(ctx, CS_MIN_MAX); diff --git a/beamformer.h b/beamformer.h @@ -35,6 +35,7 @@ enum compute_shaders { enum program_flags { RELOAD_SHADERS = 1 << 0, DO_COMPUTE = 1 << 1, + UPLOAD_UBO = 1 << 2, }; typedef struct { @@ -45,8 +46,9 @@ typedef struct { u32 hadamard_ssbo; uv2 hadamard_dim; + u32 shared_ubo; + uv4 rf_data_dim; - i32 rf_data_dim_id; i32 out_data_tex_id; i32 mip_view_tex_id; i32 mips_level_id; diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -9,6 +9,7 @@ typedef struct { uv4 output_points; /* Width * Height * Depth; last element ignored */ u32 channel_data_stride; /* Data points between channels (samples * acq + padding) */ u32 channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - f32 speed_of_sound; - f32 sampling_frequency; + f32 speed_of_sound; /* [m/s] */ + f32 sampling_frequency; /* [Hz] */ + f32 focal_depth; /* [m] */ } BeamformerParameters; diff --git a/main.c b/main.c @@ -122,7 +122,6 @@ reload_shaders(BeamformerCtx *ctx, Arena a) glDeleteShader(shader_id); } - csctx->rf_data_dim_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_rf_data_dim"); csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex"); csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex"); csctx->mips_level_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_map"); @@ -164,6 +163,11 @@ main(void) ctx.params->output_points = ctx.out_data_dim; + /* NOTE: allocate space for Uniform Buffer Object but don't send anything yet */ + glGenBuffers(1, &ctx.csctx.shared_ubo); + glBindBuffer(GL_UNIFORM_BUFFER, ctx.csctx.shared_ubo); + glBufferData(GL_UNIFORM_BUFFER, sizeof(BeamformerParameters), 0, GL_STATIC_DRAW); + ctx.flags |= RELOAD_SHADERS; while(!WindowShouldClose()) { diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl @@ -14,7 +14,17 @@ layout(std430, binding = 3) readonly restrict buffer buffer_3 { int hadamard[]; }; -layout(location = 2) uniform uvec3 u_rf_data_dim; +layout(std140, binding = 0) uniform parameters { + uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ + uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ + uvec4 rf_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uint channel_data_stride; /* Data points between channels (samples * acq + padding) */ + uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ + float speed_of_sound; /* [m/s] */ + float sampling_frequency; /* [Hz] */ + float focal_depth; /* [m] */ +}; void main() { @@ -27,7 +37,7 @@ void main() uint acq = gl_GlobalInvocationID.z; /* offset to get the correct column in hadamard matrix */ - uint hoff = u_rf_data_dim.z * acq; + uint hoff = rf_data_dim.z * acq; /* TODO: make sure incoming data is organized so that stride is 1 * i.e. each column should be a single time sample for all channels @@ -35,8 +45,8 @@ void main() */ /* offset to get the time sample and row in rf data */ - uint rstride = u_rf_data_dim.x * u_rf_data_dim.y; - uint rfoff = u_rf_data_dim.x * channel + time_sample; + uint rstride = rf_data_dim.x * rf_data_dim.y; + uint rfoff = rf_data_dim.x * channel + time_sample; uint ridx = rfoff / 2; uint ridx_delta = rstride / 2; @@ -49,7 +59,7 @@ void main() /* NOTE: Compute N-D dot product */ int sum = 0; - for (int i = 0; i < u_rf_data_dim.z; i++) { + for (int i = 0; i < rf_data_dim.z; i++) { int data = (rf_data[ridx] << lfs) >> 16; sum += hadamard[hoff + i] * data; ridx += ridx_delta; diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl @@ -6,14 +6,22 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { float rf_data[]; }; -#define C_SPLINE 0.5 +layout(std140, binding = 0) uniform parameters { + uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ + uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ + uvec4 rf_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uint channel_data_stride; /* Data points between channels (samples * acq + padding) */ + uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ + float speed_of_sound; /* [m/s] */ + float sampling_frequency; /* [Hz] */ + float focal_depth; /* [m] */ +}; +//layout(location = 6) uniform sampler2D u_element_positions; layout(rg32f, location = 1) uniform image3D u_out_data_tex; -layout(location = 2) uniform uvec3 u_rf_data_dim; -layout(location = 3) uniform float u_sound_speed = 1452; -layout(location = 4) uniform float u_sampling_frequency = 2.0833e7; -layout(location = 5) uniform float u_focal_depth = 0.07; -//layout(location = 6) uniform sampler2D u_element_positions; + +#define C_SPLINE 0.5 /* NOTE: See: https://en.wikipedia.org/wiki/Cubic_Hermite_spline */ float cubic(uint ridx, float x) @@ -68,27 +76,27 @@ void main() float sparse_elems[] = {17, 33, 49, 65, 80, 96, 112}; float x = image_point.x - xdc_upper_left.x; - float dx = xdc_size.x / float(u_rf_data_dim.y); - float dzsign = sign(image_point.z - u_focal_depth); + float dx = xdc_size.x / float(rf_data_dim.y); + float dzsign = sign(image_point.z - focal_depth); float sum = 0; /* NOTE: skip first acquisition since its garbage */ - uint ridx = u_rf_data_dim.y * u_rf_data_dim.x; - for (uint i = 1; i < u_rf_data_dim.z; i++) { - vec3 focal_point = vec3(sparse_elems[i - 1] * dx, 0, u_focal_depth); - float transmit_dist = u_focal_depth + dzsign * distance(image_point, focal_point); + uint ridx = rf_data_dim.y * rf_data_dim.x; + for (uint i = 1; i < rf_data_dim.z; i++) { + vec3 focal_point = vec3(sparse_elems[i - 1] * dx, 0, focal_depth); + float transmit_dist = focal_depth + dzsign * distance(image_point, focal_point); vec2 rdist = vec2(x, image_point.z); - for (uint j = 0; j < u_rf_data_dim.y; j++) { + for (uint j = 0; j < rf_data_dim.y; j++) { float dist = transmit_dist + length(rdist); - float rsample = dist * u_sampling_frequency / u_sound_speed; + float rsample = dist * sampling_frequency / speed_of_sound; /* NOTE: do cubic interp between adjacent time samples */ sum += cubic(ridx, rsample); rdist.x -= dx; - ridx += u_rf_data_dim.x; + ridx += rf_data_dim.x; } - ridx += u_rf_data_dim.y * u_rf_data_dim.x; + ridx += rf_data_dim.y * rf_data_dim.x; } imageStore(u_out_data_tex, out_coord, vec4(sum, sum, 0, 0)); } diff --git a/util.c b/util.c @@ -15,6 +15,14 @@ die(char *fmt, ...) exit(1); } +static void +mem_copy(s8 src, s8 dest) +{ + ASSERT(src.len <= dest.len); + for (size i = 0; i < src.len; i++) + dest.data[i] = src.data[i]; +} + static void * mem_clear(u8 *p, u8 c, size len) {