ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: d5355cf2830265cdfe5ca686f0865a28ebc8a46b
Parent: e73757c36e0d0c8fb3acf3b25be1879c4d6a3260
Author: Randy Palamar
Date:   Tue,  9 Sep 2025 09:38:42 -0600

meta/DAS: make interpolate flag a comptime flag

The performance difference when interpolation is enabled is
minimal but it is significant when interpolation is disabled. For
a 2D image this results in a ~10% performance boost when
interpolation is disabled (completely reasonable when the data
isn't decimated).

Diffstat:
Mbeamformer.c | 4+++-
Mbeamformer.meta | 4++--
Mgenerated/beamformer.meta.c | 20++++++++++++++------
Mshaders/das.glsl | 4++--
4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -429,7 +429,6 @@ das_ubo_from_beamformer_parameters(BeamformerDASUBO *du, BeamformerParameters *b du->acquisition_count = bp->acquisition_count; du->shader_flags = 0; - if (bp->interpolate) du->shader_flags |= BeamformerShaderDASFlags_Interpolate; if (bp->coherency_weighting) du->shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; if (bp->transmit_mode == BeamformerRCAOrientation_Columns) du->shader_flags |= BeamformerShaderDASFlags_TxColumns; @@ -519,6 +518,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) local_flags |= BeamformerShaderDASFlags_Fast; if (bp->shader_kind == DASShaderKind_UFORCES || bp->shader_kind == DASShaderKind_UHERCULES) local_flags |= BeamformerShaderDASFlags_Sparse; + if (pb->parameters.interpolate) + local_flags |= BeamformerShaderDASFlags_Interpolate; + match = beamformer_shader_das_match(das_data_kind, local_flags); commit = 1; }break; diff --git a/beamformer.meta b/beamformer.meta @@ -35,12 +35,12 @@ { @Permute(DataKind [Float32 Float32Complex]) { - @PermuteFlags([Fast Sparse]) + @PermuteFlags([Fast Sparse Interpolate]) } @Enumeration(RCAOrientation) - @Flags([Interpolate CoherencyWeighting RxColumns TxColumns]) + @Flags([CoherencyWeighting RxColumns TxColumns]) } @Shader(min_max.glsl) MinMax diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -130,10 +130,18 @@ read_only global i32 *beamformer_shader_match_vectors[] = { (i32 []){BeamformerDataKind_Float32, 0x01}, (i32 []){BeamformerDataKind_Float32, 0x02}, (i32 []){BeamformerDataKind_Float32, 0x03}, + (i32 []){BeamformerDataKind_Float32, 0x04}, + (i32 []){BeamformerDataKind_Float32, 0x05}, + (i32 []){BeamformerDataKind_Float32, 0x06}, + (i32 []){BeamformerDataKind_Float32, 0x07}, (i32 []){BeamformerDataKind_Float32Complex, 0x00}, (i32 []){BeamformerDataKind_Float32Complex, 0x01}, (i32 []){BeamformerDataKind_Float32Complex, 0x02}, (i32 []){BeamformerDataKind_Float32Complex, 0x03}, + (i32 []){BeamformerDataKind_Float32Complex, 0x04}, + (i32 []){BeamformerDataKind_Float32Complex, 0x05}, + (i32 []){BeamformerDataKind_Float32Complex, 0x06}, + (i32 []){BeamformerDataKind_Float32Complex, 0x07}, // MinMax 0, // Sum @@ -141,7 +149,7 @@ read_only global i32 *beamformer_shader_match_vectors[] = { // Render3D 0, }; -#define beamformer_match_vectors_count (53) +#define beamformer_match_vectors_count (61) read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { {0, 1, 0, 0, 0}, @@ -149,10 +157,10 @@ read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { {2, 6, 1, 2, 0}, {6, 18, 1, 1, 1}, {18, 42, 2, 2, 1}, - {42, 50, 1, 2, 1}, - {50, 51, 0, 0, 0}, - {51, 52, 0, 0, 0}, - {52, 53, 0, 0, 0}, + {42, 58, 1, 2, 1}, + {58, 59, 0, 0, 0}, + {59, 60, 0, 0, 0}, + {60, 61, 0, 0, 0}, }; read_only global s8 beamformer_shader_names[] = { @@ -303,7 +311,7 @@ beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode function iz beamformer_shader_das_match(BeamformerDataKind a, i32 flags) { - iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 50, 2); + iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 58, 2); return result; } diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -3,7 +3,8 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { vec2 rf_data[]; }; -const bool sparse = (ShaderFlags & ShaderFlags_Sparse) != 0; +const bool sparse = bool(ShaderFlags & ShaderFlags_Sparse); +const bool interpolate = bool(ShaderFlags & ShaderFlags_Interpolate); #if (ShaderFlags & ShaderFlags_Fast) layout(rg32f, binding = 0) restrict uniform image3D u_out_data_tex; @@ -59,7 +60,6 @@ vec2 cubic(int base_index, float index) vec2 sample_rf(int channel, int transmit, float index) { - bool interpolate = bool(shader_flags & ShaderFlags_Interpolate); vec2 result = vec2(index >= 0.0f) * vec2((int(index) + 1 + int(interpolate)) < sample_count); int base_index = int(channel * sample_count * acquisition_count + transmit * sample_count); if (interpolate) result *= cubic(base_index, index);