ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 3939648169851306c32bf37a52f73ac8567c85fb
Parent: ba9b99fed19c9da46620c15c04639080dae659a6
Author: Randy Palamar
Date:   Mon,  3 Nov 2025 06:10:20 -0700

shaders/decode: add a compile time flag for LDS

Diffstat:
Mbeamformer.c | 8+++++---
Mbeamformer.meta | 2+-
Mgenerated/beamformer.meta.c | 6++++--
Mshaders/decode.glsl | 4++--
4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -505,8 +505,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } } - if (run_cuda_hilbert) sd->bake.flags |= BeamformerShaderDecodeFlags_DilateOutput; - BeamformerShaderKind *last_shader = cp->pipeline.shaders + slot - 1; assert(first || ((*last_shader == BeamformerShaderKind_Demodulate || *last_shader == BeamformerShaderKind_Filter))); @@ -527,9 +525,13 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) db->output_transmit_stride *= decimation_rate; } + if (run_cuda_hilbert) sd->bake.flags |= BeamformerShaderDecodeFlags_DilateOutput; + if (db->transmit_count > 32) sd->bake.flags |= BeamformerShaderDecodeFlags_UseSharedMemory; + db->transmits_processed = db->transmit_count >= 32 ? 2 : 1; - b32 use_16z = db->transmit_count <= 32 || db->transmit_count == 80 || db->transmit_count == 96 || db->transmit_count == 160; + b32 use_16z = db->transmit_count <= 32 || db->transmit_count == 80 || + db->transmit_count == 96 || db->transmit_count == 160; sd->layout.x = 4; sd->layout.y = 1; sd->layout.z = use_16z? 16 : 32; diff --git a/beamformer.meta b/beamformer.meta @@ -59,7 +59,7 @@ { @Enumeration(DataKind) @Enumeration(DecodeMode) - @Flags([DilateOutput]) + @Flags([DilateOutput UseSharedMemory]) @Bake { diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -53,7 +53,8 @@ typedef enum { } BeamformerInterpolationMode; typedef enum { - BeamformerShaderDecodeFlags_DilateOutput = (1 << 0), + BeamformerShaderDecodeFlags_DilateOutput = (1 << 0), + BeamformerShaderDecodeFlags_UseSharedMemory = (1 << 1), } BeamformerShaderDecodeFlags; typedef enum { @@ -240,6 +241,7 @@ read_only global s8 beamformer_shader_global_header_strings[] = { read_only global s8 *beamformer_shader_flag_strings[] = { (s8 []){ s8_comp("DilateOutput"), + s8_comp("UseSharedMemory"), }, (s8 []){ s8_comp("ComplexFilter"), @@ -259,7 +261,7 @@ read_only global s8 *beamformer_shader_flag_strings[] = { }; read_only global u8 beamformer_shader_flag_strings_count[] = { - 1, + 2, 3, 5, 0, diff --git a/shaders/decode.glsl b/shaders/decode.glsl @@ -90,7 +90,7 @@ void main() result[i] = sample_rf_data(rf_offset + transmit + i); }break; case DecodeMode_Hadamard:{ - #if TransmitCount > 32 + #if UseSharedMemory { uint thread_count = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; uint thread_index = gl_LocalInvocationIndex; @@ -115,7 +115,7 @@ void main() result[i] = SAMPLE_DATA_TYPE(0); for (int j = 0; j < TransmitCount; j++) { - #if TransmitCount > 32 + #if UseSharedMemory SAMPLE_DATA_TYPE s = SAMPLE_TYPE_CAST(rf[gl_LocalInvocationID.x * TransmitCount + j]); #else SAMPLE_DATA_TYPE s = sample_rf_data(rf_offset + j);