Commit: d5355cf2830265cdfe5ca686f0865a28ebc8a46b
Parent: e73757c36e0d0c8fb3acf3b25be1879c4d6a3260
Author: Randy Palamar
Date: Tue, 9 Sep 2025 09:38:42 -0600
meta/DAS: make interpolate flag a comptime flag
The performance difference when interpolation is enabled is
minimal but it is significant when interpolation is disabled. For
a 2D image this results in a ~10% performance boost when
interpolation is disabled (completely reasonable when the data
isn't decimated).
Diffstat:
4 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -429,7 +429,6 @@ das_ubo_from_beamformer_parameters(BeamformerDASUBO *du, BeamformerParameters *b
du->acquisition_count = bp->acquisition_count;
du->shader_flags = 0;
- if (bp->interpolate) du->shader_flags |= BeamformerShaderDASFlags_Interpolate;
if (bp->coherency_weighting) du->shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting;
if (bp->transmit_mode == BeamformerRCAOrientation_Columns)
du->shader_flags |= BeamformerShaderDASFlags_TxColumns;
@@ -519,6 +518,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
local_flags |= BeamformerShaderDASFlags_Fast;
if (bp->shader_kind == DASShaderKind_UFORCES || bp->shader_kind == DASShaderKind_UHERCULES)
local_flags |= BeamformerShaderDASFlags_Sparse;
+ if (pb->parameters.interpolate)
+ local_flags |= BeamformerShaderDASFlags_Interpolate;
+
match = beamformer_shader_das_match(das_data_kind, local_flags);
commit = 1;
}break;
diff --git a/beamformer.meta b/beamformer.meta
@@ -35,12 +35,12 @@
{
@Permute(DataKind [Float32 Float32Complex])
{
- @PermuteFlags([Fast Sparse])
+ @PermuteFlags([Fast Sparse Interpolate])
}
@Enumeration(RCAOrientation)
- @Flags([Interpolate CoherencyWeighting RxColumns TxColumns])
+ @Flags([CoherencyWeighting RxColumns TxColumns])
}
@Shader(min_max.glsl) MinMax
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -130,10 +130,18 @@ read_only global i32 *beamformer_shader_match_vectors[] = {
(i32 []){BeamformerDataKind_Float32, 0x01},
(i32 []){BeamformerDataKind_Float32, 0x02},
(i32 []){BeamformerDataKind_Float32, 0x03},
+ (i32 []){BeamformerDataKind_Float32, 0x04},
+ (i32 []){BeamformerDataKind_Float32, 0x05},
+ (i32 []){BeamformerDataKind_Float32, 0x06},
+ (i32 []){BeamformerDataKind_Float32, 0x07},
(i32 []){BeamformerDataKind_Float32Complex, 0x00},
(i32 []){BeamformerDataKind_Float32Complex, 0x01},
(i32 []){BeamformerDataKind_Float32Complex, 0x02},
(i32 []){BeamformerDataKind_Float32Complex, 0x03},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x04},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x05},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x06},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x07},
// MinMax
0,
// Sum
@@ -141,7 +149,7 @@ read_only global i32 *beamformer_shader_match_vectors[] = {
// Render3D
0,
};
-#define beamformer_match_vectors_count (53)
+#define beamformer_match_vectors_count (61)
read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {
{0, 1, 0, 0, 0},
@@ -149,10 +157,10 @@ read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {
{2, 6, 1, 2, 0},
{6, 18, 1, 1, 1},
{18, 42, 2, 2, 1},
- {42, 50, 1, 2, 1},
- {50, 51, 0, 0, 0},
- {51, 52, 0, 0, 0},
- {52, 53, 0, 0, 0},
+ {42, 58, 1, 2, 1},
+ {58, 59, 0, 0, 0},
+ {59, 60, 0, 0, 0},
+ {60, 61, 0, 0, 0},
};
read_only global s8 beamformer_shader_names[] = {
@@ -303,7 +311,7 @@ beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode
function iz
beamformer_shader_das_match(BeamformerDataKind a, i32 flags)
{
- iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 50, 2);
+ iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 58, 2);
return result;
}
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -3,7 +3,8 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 {
vec2 rf_data[];
};
-const bool sparse = (ShaderFlags & ShaderFlags_Sparse) != 0;
+const bool sparse = bool(ShaderFlags & ShaderFlags_Sparse);
+const bool interpolate = bool(ShaderFlags & ShaderFlags_Interpolate);
#if (ShaderFlags & ShaderFlags_Fast)
layout(rg32f, binding = 0) restrict uniform image3D u_out_data_tex;
@@ -59,7 +60,6 @@ vec2 cubic(int base_index, float index)
vec2 sample_rf(int channel, int transmit, float index)
{
- bool interpolate = bool(shader_flags & ShaderFlags_Interpolate);
vec2 result = vec2(index >= 0.0f) * vec2((int(index) + 1 + int(interpolate)) < sample_count);
int base_index = int(channel * sample_count * acquisition_count + transmit * sample_count);
if (interpolate) result *= cubic(base_index, index);