ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 51b580cf67ba027ff9a1b12942460dda838e3a70
Parent: 001e7f48eff42b37b1128fd85161f984276e14cc
Author: Randy Palamar
Date:   Tue, 20 Aug 2024 14:22:59 -0600

move beat frequency multiplication to demod shader

This allows demodulation to be more easily disabled

Diffstat:
Mbeamformer.c | 2+-
Mbeamformer_parameters.h | 8++++----
Mhelpers/ogl_beamformer_lib.c | 2+-
Mmain.c | 4++--
Mshaders/2d_hercules.glsl | 2+-
Ashaders/demod.glsl | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mshaders/hadamard.glsl | 7++-----
Dshaders/lpf.glsl | 54------------------------------------------------------
Mshaders/uforces.glsl | 4++--
Mui.c | 2+-
10 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -129,7 +129,7 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; break; - case CS_LPF: + case CS_DEMOD: glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -2,9 +2,9 @@ enum compute_shaders { /* TODO: Probably this should be split up */ CS_CUDA_DECODE_AND_DEMOD = 0, - CS_HADAMARD = 1, - CS_HERCULES = 2, - CS_LPF = 3, + CS_DEMOD = 1, + CS_HADAMARD = 2, + CS_HERCULES = 3, CS_MIN_MAX = 4, CS_UFORCES = 5, CS_LAST @@ -24,7 +24,7 @@ typedef struct { v2 xdc_min_xy; /* [m] Min center of transducer elements */ v2 xdc_max_xy; /* [m] Max center of transducer elements */ u32 channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - u32 lpf_order; /* Order of Low Pass Filter */ + i32 lpf_order; /* Order of Low Pass Filter (-1 if disabled) */ f32 speed_of_sound; /* [m/s] */ f32 sampling_frequency; /* [Hz] */ f32 center_frequency; /* [Hz] */ diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -141,9 +141,9 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count) for (i32 i = 0; i < stages_count; i++) { switch (stages[i]) { case CS_CUDA_DECODE_AND_DEMOD: + case CS_DEMOD: case CS_HADAMARD: case CS_HERCULES: - case CS_LPF: case CS_MIN_MAX: case CS_UFORCES: g_bp->compute_stages[i] = stages[i]; diff --git a/main.c b/main.c @@ -6,7 +6,7 @@ static os_library_handle g_cuda_lib_handle; static char *compute_shader_paths[CS_LAST] = { [CS_HADAMARD] = "shaders/hadamard.glsl", [CS_HERCULES] = "shaders/2d_hercules.glsl", - [CS_LPF] = "shaders/lpf.glsl", + [CS_DEMOD] = "shaders/demod.glsl", [CS_MIN_MAX] = "shaders/min_max.glsl", [CS_UFORCES] = "shaders/uforces.glsl", }; @@ -182,7 +182,7 @@ main(void) ctx.params->raw.output_points = ctx.out_data_dim; /* NOTE: default compute shader pipeline */ ctx.params->compute_stages[0] = CS_HADAMARD; - ctx.params->compute_stages[1] = CS_LPF; + ctx.params->compute_stages[1] = CS_DEMOD; ctx.params->compute_stages[2] = CS_UFORCES; ctx.params->compute_stages[3] = CS_MIN_MAX; ctx.params->compute_stages_count = 4; diff --git a/shaders/2d_hercules.glsl b/shaders/2d_hercules.glsl @@ -18,7 +18,7 @@ layout(std140, binding = 0) uniform parameters{ vec2 xdc_min_xy; /* [m] Min center of transducer elements */ vec2 xdc_max_xy; /* [m] Max center of transducer elements */ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - uint lpf_order; /* Order of Low Pass Filter */ + int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */ float speed_of_sound; /* [m/s] */ float sampling_frequency; /* [Hz] */ float center_frequency; /* [Hz] */ diff --git a/shaders/demod.glsl b/shaders/demod.glsl @@ -0,0 +1,59 @@ +/* See LICENSE for license details. */ +#version 460 core +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +layout(std430, binding = 1) readonly restrict buffer buffer_1 { + vec2 in_data[]; +}; + +layout(std430, binding = 2) writeonly restrict buffer buffer_2 { + vec2 out_data[]; +}; + +layout(std140, binding = 0) uniform parameters { + uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ + uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ + vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */ + uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ + uvec4 output_points; /* Width * Height * Depth; last element ignored */ + uvec2 rf_raw_dim; /* Raw Data Dimensions */ + vec2 output_min_xz; /* [m] Top left corner of output region */ + vec2 output_max_xz; /* [m] Bottom right corner of output region */ + vec2 xdc_min_xy; /* [m] Min center of transducer elements */ + vec2 xdc_max_xy; /* [m] Max center of transducer elements */ + uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ + int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */ + float speed_of_sound; /* [m/s] */ + float sampling_frequency; /* [Hz] */ + float center_frequency; /* [Hz] */ + float focal_depth; /* [m] */ + float time_offset; /* pulse length correction time [s] */ + uint uforces; /* mode is UFORCES (1) or FORCES (0) */ + float off_axis_pos; /* Where on the 3rd axis to render the image (Hercules only) */ +}; + +void main() +{ + uint time_sample = gl_GlobalInvocationID.x; + uint channel = gl_GlobalInvocationID.y; + uint acq = gl_GlobalInvocationID.z; + + /* NOTE: offsets for storing the results in the output data */ + uint stride = dec_data_dim.x * dec_data_dim.y; + uint off = dec_data_dim.x * channel + stride * acq + time_sample; + + /* NOTE: for calculating full-band I-Q data; needs to be stepped in loop */ + float arg = radians(360) * center_frequency * time_sample / sampling_frequency; + float arg_delta = radians(360) * center_frequency / sampling_frequency; + + vec2 sum = vec2(0); + for (int i = 0; i <= lpf_order; i++) { + vec2 data; + /* NOTE: make sure data samples come from the same acquisition */ + if (time_sample >= i) data = in_data[off - i].xx * vec2(cos(arg), sin(arg)); + else data = vec2(0); + sum += lpf_coefficients[i / 4][i % 4] * data; + arg -= arg_delta; + } + out_data[off] = sum; +} diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl @@ -26,7 +26,7 @@ layout(std140, binding = 0) uniform parameters { vec2 xdc_min_xy; /* [m] Min center of transducer elements */ vec2 xdc_max_xy; /* [m] Max center of transducer elements */ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - uint lpf_order; /* Order of Low Pass Filter */ + int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */ float speed_of_sound; /* [m/s] */ float sampling_frequency; /* [Hz] */ float center_frequency; /* [Hz] */ @@ -79,8 +79,5 @@ void main() sum += hadamard[hoff + i] * data; ridx += ridx_delta; } - - float arg = radians(360) * center_frequency * time_sample / sampling_frequency; - out_data[out_off].x = float(sum) * cos(arg); - out_data[out_off].y = float(sum) * sin(arg); + out_data[out_off] = vec2(float(sum), 0); } diff --git a/shaders/lpf.glsl b/shaders/lpf.glsl @@ -1,54 +0,0 @@ -/* See LICENSE for license details. */ -#version 460 core -layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; - -layout(std430, binding = 1) readonly restrict buffer buffer_1 { - vec2 in_data[]; -}; - -layout(std430, binding = 2) writeonly restrict buffer buffer_2 { - vec2 out_data[]; -}; - -layout(std140, binding = 0) uniform parameters { - uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ - uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ - vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */ - uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth; last element ignored */ - uvec2 rf_raw_dim; /* Raw Data Dimensions */ - vec2 output_min_xz; /* [m] Top left corner of output region */ - vec2 output_max_xz; /* [m] Bottom right corner of output region */ - vec2 xdc_min_xy; /* [m] Min center of transducer elements */ - vec2 xdc_max_xy; /* [m] Max center of transducer elements */ - uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - uint lpf_order; /* Order of Low Pass Filter */ - float speed_of_sound; /* [m/s] */ - float sampling_frequency; /* [Hz] */ - float center_frequency; /* [Hz] */ - float focal_depth; /* [m] */ - float time_offset; /* pulse length correction time [s] */ - uint uforces; /* mode is UFORCES (1) or FORCES (0) */ - float off_axis_pos; /* Where on the 3rd axis to render the image (Hercules only) */ -}; - -void main() -{ - uint time_sample = gl_GlobalInvocationID.x; - uint channel = gl_GlobalInvocationID.y; - uint acq = gl_GlobalInvocationID.z; - - /* NOTE: offsets for storing the results in the output data */ - uint stride = dec_data_dim.x * dec_data_dim.y; - uint off = dec_data_dim.x * channel + stride * acq + time_sample; - - vec2 sum = vec2(0); - for (int i = 0; i <= lpf_order; i++) { - vec2 data; - /* NOTE: make sure data samples come from the same acquisition */ - if (time_sample > i) data = in_data[off - i]; - else data = vec2(0); - sum += lpf_coefficients[i / 4][i % 4] * data; - } - out_data[off] = sum; -} diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl @@ -18,7 +18,7 @@ layout(std140, binding = 0) uniform parameters { vec2 xdc_min_xy; /* [m] Min center of transducer elements */ vec2 xdc_max_xy; /* [m] Max center of transducer elements */ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - uint lpf_order; /* Order of Low Pass Filter */ + int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */ float speed_of_sound; /* [m/s] */ float sampling_frequency; /* [Hz] */ float center_frequency; /* [Hz] */ @@ -90,7 +90,7 @@ void main() float dzsign = sign(image_point.z - focal_depth); /* NOTE: offset correcting for both pulse length and low pass filtering */ - float time_correction = time_offset + (lpf_order + 1)/sampling_frequency; + float time_correction = time_offset + (lpf_order + 1) / sampling_frequency; vec2 sum = vec2(0); /* NOTE: skip first acquisition in uforces since its garbage */ diff --git a/ui.c b/ui.c @@ -346,7 +346,7 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r) [CS_CUDA_DECODE_AND_DEMOD] = s8("CUDA Decoding:"), [CS_HADAMARD] = s8("Decoding:"), [CS_HERCULES] = s8("HERCULES:"), - [CS_LPF] = s8("LPF:"), + [CS_DEMOD] = s8("Demodulation:"), [CS_MIN_MAX] = s8("Min/Max:"), [CS_UFORCES] = s8("UFORCES:"), };