ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 3b3aec3f666b5eecc311a99b5f712bfa8431412e
Parent: 5d3f02b4e90c61bf644a4a13b9f4037ba08a7298
Author: Randy Palamar
Date:   Wed, 24 Jul 2024 12:47:28 -0600

send raw rf data size and decoded size separately

These are two separate sizes and they are both needed to do things correctly.

Diffstat:
Mbeamformer.c | 34++++++++++++++++++----------------
Mbeamformer.h | 3++-
Mbeamformer_parameters.h | 6+++---
Mhelpers/ogl_beamformer_lib.c | 8++++----
Mhelpers/ogl_beamformer_lib.h | 3++-
Mshaders/hadamard.glsl | 14+++++++-------
Mshaders/lpf.glsl | 8++++----
Mshaders/uforces.glsl | 16++++++++--------
8 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -51,10 +51,12 @@ static void alloc_shader_storage(BeamformerCtx *ctx, Arena a) { BeamformerParameters *bp = &ctx->params->raw; - uv4 rf_data_dim = bp->rf_data_dim; - size rf_raw_size = bp->channel_data_stride * rf_data_dim.y * rf_data_dim.z * sizeof(i16); - size rf_decoded_size = rf_data_dim.x * rf_data_dim.y * rf_data_dim.z * sizeof(f32) * 2; - ctx->csctx.rf_data_dim = rf_data_dim; + uv4 dec_data_dim = bp->dec_data_dim; + uv2 rf_raw_dim = bp->rf_raw_dim; + size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); + size rf_decoded_size = dec_data_dim.x * dec_data_dim.y * dec_data_dim.z * sizeof(f32) * 2; + ctx->csctx.rf_raw_dim = rf_raw_dim; + ctx->csctx.dec_data_dim = dec_data_dim; glDeleteBuffers(ARRAY_COUNT(ctx->csctx.rf_data_ssbos), ctx->csctx.rf_data_ssbos); glDeleteBuffers(1, &ctx->csctx.raw_data_ssbo); @@ -70,10 +72,10 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a) } /* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */ - ctx->csctx.hadamard_dim = (uv2){.x = rf_data_dim.z, .y = rf_data_dim.z}; - size hadamard_elements = rf_data_dim.z * rf_data_dim.z; + ctx->csctx.hadamard_dim = (uv2){.x = dec_data_dim.z, .y = dec_data_dim.z}; + size hadamard_elements = dec_data_dim.z * dec_data_dim.z; i32 *hadamard = alloc(&a, i32, hadamard_elements); - fill_hadamard(hadamard, rf_data_dim.z); + fill_hadamard(hadamard, dec_data_dim.z); rlUnloadShaderBuffer(ctx->csctx.hadamard_ssbo); ctx->csctx.hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard, @@ -96,9 +98,9 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo); - glDispatchCompute(ORONE(csctx->rf_data_dim.x / 32), - ORONE(csctx->rf_data_dim.y / 32), - ORONE(csctx->rf_data_dim.z)); + glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), + ORONE(csctx->dec_data_dim.y / 32), + ORONE(csctx->dec_data_dim.z)); csctx->last_active_ssbo_index = !csctx->last_active_ssbo_index; break; case CS_LPF: @@ -108,9 +110,9 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->lpf_ssbo); glUniform1i(csctx->lpf_order_id, csctx->lpf_order); #endif - glDispatchCompute(ORONE(csctx->rf_data_dim.x / 32), - ORONE(csctx->rf_data_dim.y / 32), - ORONE(csctx->rf_data_dim.z)); + glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), + ORONE(csctx->dec_data_dim.y / 32), + ORONE(csctx->dec_data_dim.z)); csctx->last_active_ssbo_index = !csctx->last_active_ssbo_index; break; case CS_MIN_MAX: @@ -405,7 +407,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) BeamformerParameters *bp = &ctx->params->raw; /* NOTE: Check for and Load RF Data into GPU */ if (os_poll_pipe(ctx->data_pipe)) { - if (!uv4_equal(ctx->csctx.rf_data_dim, bp->rf_data_dim) || ctx->flags & ALLOC_SSBOS) + if (!uv4_equal(ctx->csctx.dec_data_dim, bp->dec_data_dim) || ctx->flags & ALLOC_SSBOS) alloc_shader_storage(ctx, arena); if (!uv4_equal(ctx->out_data_dim, bp->output_points) || ctx->flags & ALLOC_OUT_TEX) alloc_output_image(ctx); @@ -413,8 +415,8 @@ do_beamformer(BeamformerCtx *ctx, Arena arena) glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.raw_data_ssbo); void *rf_data_buf = glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_WRITE_ONLY); ASSERT(rf_data_buf); - uv4 rf_data_dim = ctx->csctx.rf_data_dim; - size rf_raw_size = rf_data_dim.x * rf_data_dim.y * rf_data_dim.z * sizeof(i16); + uv2 rf_raw_dim = ctx->csctx.rf_raw_dim; + size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); size rlen = os_read_pipe_data(ctx->data_pipe, rf_data_buf, rf_raw_size); glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); diff --git a/beamformer.h b/beamformer.h @@ -64,7 +64,8 @@ typedef struct { u32 shared_ubo; - uv4 rf_data_dim; + uv4 dec_data_dim; + uv2 rf_raw_dim; i32 out_data_tex_id; i32 mip_view_tex_id; i32 mips_level_id; diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -6,18 +6,18 @@ typedef struct { u32 channel_mapping[256]; /* Transducer Channel to Verasonics Channel */ u32 uforces_channels[128]; /* Channels used for virtual UFORCES elements */ f32 lpf_coefficients[64]; /* Low Pass Filter Cofficients */ - uv4 rf_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uv4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ uv4 output_points; /* Width * Height * Depth; last element ignored */ + uv2 rf_raw_dim; /* Raw Data Dimensions */ v2 output_min_xz; /* [m] Top left corner of output region */ v2 output_max_xz; /* [m] Bottom right corner of output region */ v2 xdc_min_xy; /* [m] Min center of transducer elements */ v2 xdc_max_xy; /* [m] Max center of transducer elements */ - u32 channel_data_stride; /* Data points between channels (samples * acq + padding) */ u32 channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ u32 lpf_order; /* Order of Low Pass Filter */ f32 speed_of_sound; /* [m/s] */ f32 sampling_frequency; /* [Hz] */ f32 center_frequency; /* [Hz] */ f32 focal_depth; /* [m] */ - f32 _pad[2]; + f32 _pad[3]; } BeamformerParameters; diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -124,7 +124,7 @@ check_shared_memory(char *name) } void -send_data(char *pipe_name, char *shm_name, i16 *data, uv4 data_dim) +send_data(char *pipe_name, char *shm_name, i16 *data, uv2 data_dim) { if (g_pipe.file == OS_INVALID_FILE) { g_pipe = os_open_named_pipe(pipe_name); @@ -136,9 +136,9 @@ send_data(char *pipe_name, char *shm_name, i16 *data, uv4 data_dim) check_shared_memory(shm_name); /* TODO: this probably needs a mutex around it if we want to change it here */ - g_bp->raw.rf_data_dim = data_dim; - size data_size = data_dim.x * data_dim.y * data_dim.z * sizeof(i16); - size written = os_write_to_pipe(g_pipe, data, data_size); + g_bp->raw.rf_raw_dim = data_dim; + size data_size = data_dim.x * data_dim.y * sizeof(i16); + size written = os_write_to_pipe(g_pipe, data, data_size); if (written != data_size) mexWarnMsgIdAndTxt("ogl_beamformer:write_error", "failed to write full data to pipe: wrote: %ld", written); diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -20,9 +20,10 @@ typedef ptrdiff_t size; #endif typedef struct { f32 x, y; } v2; +typedef struct { u32 x, y; } uv2; typedef struct { u32 x, y, z, w; } uv4; #include "../beamformer_parameters.h" LIB_FN void set_beamformer_parameters(char *shm_name, BeamformerParameters *); -LIB_FN void send_data(char *pipe_name, char *shm_name, i16 *data, uv4 data_dim); +LIB_FN void send_data(char *pipe_name, char *shm_name, i16 *data, uv2 data_dim); diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl @@ -18,13 +18,13 @@ layout(std140, binding = 0) uniform parameters { uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */ - uvec4 rf_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec2 rf_raw_dim; /* Raw Data Dimensions */ vec2 output_min_xz; /* [m] Top left corner of output region */ vec2 output_max_xz; /* [m] Bottom right corner of output region */ vec2 xdc_min_xy; /* [m] Min center of transducer elements */ vec2 xdc_max_xy; /* [m] Max center of transducer elements */ - uint channel_data_stride; /* Data points between channels (samples * acq + padding) */ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ uint lpf_order; /* Order of Low Pass Filter */ float speed_of_sound; /* [m/s] */ @@ -45,10 +45,10 @@ void main() uint acq = gl_GlobalInvocationID.z; /* NOTE: offset to get the correct column in hadamard matrix */ - uint hoff = rf_data_dim.z * acq; + uint hoff = dec_data_dim.z * acq; /* NOTE: offsets for storing the results in the output data */ - uint out_off = rf_data_dim.x * rf_data_dim.y * acq + rf_data_dim.x * channel + time_sample; + uint out_off = dec_data_dim.x * dec_data_dim.y * acq + dec_data_dim.x * channel + time_sample; uint ch_base_idx = (channel + channel_offset) / 4; uint ch_sub_idx = (channel + channel_offset) - ch_base_idx * 4; @@ -56,8 +56,8 @@ void main() /* NOTE: stride is the number of samples between acquistions; off is the * index of the first acquisition for this channel and time sample */ - uint rf_stride = rf_data_dim.x; - uint rf_off = channel_data_stride * rf_channel + rf_data_dim.x * acq + time_sample; + uint rf_stride = dec_data_dim.x; + uint rf_off = rf_raw_dim.x * rf_channel + dec_data_dim.x * acq + time_sample; /* NOTE: rf_data index and stride considering the data is i16 not i32 */ uint ridx = rf_off / 2; @@ -71,7 +71,7 @@ void main() /* NOTE: Compute N-D dot product */ int sum = 0; - for (int i = 0; i < rf_data_dim.z; i++) { + for (int i = 0; i < dec_data_dim.z; i++) { int data = (rf_data[ridx] << lfs) >> 16; sum += hadamard[hoff + i] * data; ridx += ridx_delta; diff --git a/shaders/lpf.glsl b/shaders/lpf.glsl @@ -18,13 +18,13 @@ layout(std140, binding = 0) uniform parameters { uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */ - uvec4 rf_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec2 rf_raw_dim; /* Raw Data Dimensions */ vec2 output_min_xz; /* [m] Top left corner of output region */ vec2 output_max_xz; /* [m] Bottom right corner of output region */ vec2 xdc_min_xy; /* [m] Min center of transducer elements */ vec2 xdc_max_xy; /* [m] Max center of transducer elements */ - uint channel_data_stride; /* Data points between channels (samples * acq + padding) */ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ uint lpf_order; /* Order of Low Pass Filter */ float speed_of_sound; /* [m/s] */ @@ -42,8 +42,8 @@ void main() uint acq = gl_GlobalInvocationID.z; /* NOTE: offsets for storing the results in the output data */ - uint stride = rf_data_dim.x * rf_data_dim.y; - uint off = rf_data_dim.x * channel + time_sample; + uint stride = dec_data_dim.x * dec_data_dim.y; + uint off = dec_data_dim.x * channel + time_sample; vec2 sum = vec2(0); for (int i = 0; i <= lpf_order; i++) { diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl @@ -10,13 +10,13 @@ layout(std140, binding = 0) uniform parameters { uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */ - uvec4 rf_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec2 rf_raw_dim; /* Raw Data Dimensions */ vec2 output_min_xz; /* [m] Top left corner of output region */ vec2 output_max_xz; /* [m] Bottom right corner of output region */ vec2 xdc_min_xy; /* [m] Min center of transducer elements */ vec2 xdc_max_xy; /* [m] Max center of transducer elements */ - uint channel_data_stride; /* Data points between channels (samples * acq + padding) */ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ uint lpf_order; /* Order of Low Pass Filter */ float speed_of_sound; /* [m/s] */ @@ -72,16 +72,16 @@ void main() ); float x = image_point.x - xdc_min_xy.x; - float dx = xdc_size.x / float(rf_data_dim.y); + float dx = xdc_size.x / float(dec_data_dim.y); float dzsign = sign(image_point.z - focal_depth); vec2 sum = vec2(0); float time_scale = radians(360) * center_frequency; - uint ridx = rf_data_dim.y * rf_data_dim.x; + uint ridx = dec_data_dim.y * dec_data_dim.x; /* NOTE: skip first acquisition since its garbage */ - for (uint i = 1; i < rf_data_dim.z; i++) { + for (uint i = 1; i < dec_data_dim.z; i++) { uint base_idx = (i - 1) / 4; uint sub_idx = (i - 1) - base_idx * 4; @@ -89,7 +89,7 @@ void main() float transmit_dist = focal_depth + dzsign * distance(image_point, focal_point); vec2 rdist = vec2(x, image_point.z); - for (uint j = 0; j < rf_data_dim.y; j++) { + for (uint j = 0; j < dec_data_dim.y; j++) { float dist = transmit_dist + length(rdist); float time = dist / speed_of_sound; @@ -99,9 +99,9 @@ void main() p.y *= sin(time_scale * time); sum += p; rdist.x -= dx; - ridx += rf_data_dim.x; + ridx += dec_data_dim.x; } - ridx += rf_data_dim.y * rf_data_dim.x; + ridx += dec_data_dim.y * dec_data_dim.x; } float val = length(sum); imageStore(u_out_data_tex, out_coord, vec4(val, val, 0, 0));