Commit: 3e23d5d371f9e3817889c940926f5429d66c7a7a
Parent: 31df2285cfeeb900c97ecd3011a75df20ef05ad9
Author: Randy Palamar
Date: Wed, 5 Nov 2025 21:43:26 -0700
shaders/filter: allow demod to output floats directly
this allows decode shader to be skipped when processing data sets
which do not require decoding
also fix decimating with the LDS optimization
Diffstat:
4 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -575,7 +575,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
if (first && data_kind == BeamformerDataKind_Int16)
sd->dispatch.x = (u32)ceil_f32((f32)sd->dispatch.x / 2);
- commit = 1;
+ commit = first || db->decode_mode != BeamformerDecodeMode_None;
}break;
case BeamformerShaderKind_Demodulate:
case BeamformerShaderKind_Filter:
@@ -610,24 +610,28 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
fb->decimation_rate = decimation_rate;
fb->sample_count = pb->parameters.sample_count;
+ fb->output_channel_stride = das_channel_stride;
+ fb->output_sample_stride = das_sample_stride;
+ fb->output_transmit_stride = das_transmit_stride;
+
if (first) {
fb->input_channel_stride = pb->parameters.raw_data_dimensions[0] / 2;
fb->input_sample_stride = 1;
fb->input_transmit_stride = pb->parameters.sample_count / 2;
- /* NOTE(rnp): output optimized layout for decoding */
- fb->output_channel_stride = das_channel_stride;
- fb->output_sample_stride = pb->parameters.acquisition_count;
- fb->output_transmit_stride = 1;
+ if (pb->parameters.decode_mode == BeamformerDecodeMode_None) {
+ sd->bake.flags |= BeamformerShaderFilterFlags_OutputFloats;
+ } else {
+ /* NOTE(rnp): output optimized layout for decoding */
+ fb->output_channel_stride = das_channel_stride;
+ fb->output_sample_stride = pb->parameters.acquisition_count;
+ fb->output_transmit_stride = 1;
+ }
} else {
assert(cp->pipeline.shaders[slot - 1] == BeamformerShaderKind_Decode);
fb->input_channel_stride = ld->bake.Decode.output_channel_stride;
fb->input_sample_stride = ld->bake.Decode.output_sample_stride;
fb->input_transmit_stride = ld->bake.Decode.output_transmit_stride;
-
- fb->output_channel_stride = das_channel_stride;
- fb->output_sample_stride = das_sample_stride;
- fb->output_transmit_stride = das_transmit_stride;
}
} else {
fb->decimation_rate = 1;
diff --git a/beamformer.meta b/beamformer.meta
@@ -78,7 +78,7 @@
@Shader(filter.glsl) Filter
{
@Enumeration(DataKind)
- @Flags([ComplexFilter MapChannels])
+ @Flags([ComplexFilter MapChannels OutputFloats])
@Bake
{
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -60,7 +60,8 @@ typedef enum {
typedef enum {
BeamformerShaderFilterFlags_ComplexFilter = (1 << 0),
BeamformerShaderFilterFlags_MapChannels = (1 << 1),
- BeamformerShaderFilterFlags_Demodulate = (1 << 2),
+ BeamformerShaderFilterFlags_OutputFloats = (1 << 2),
+ BeamformerShaderFilterFlags_Demodulate = (1 << 3),
} BeamformerShaderFilterFlags;
typedef enum {
@@ -246,6 +247,7 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
(s8 []){
s8_comp("ComplexFilter"),
s8_comp("MapChannels"),
+ s8_comp("OutputFloats"),
s8_comp("Demodulate"),
},
(s8 []){
@@ -262,7 +264,7 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
read_only global u8 beamformer_shader_flag_strings_count[] = {
2,
- 3,
+ 4,
5,
0,
0,
diff --git a/shaders/filter.glsl b/shaders/filter.glsl
@@ -7,8 +7,17 @@
#define SAMPLE_TYPE_CAST(v) (v)
#else
#define DATA_TYPE uint
- #define RESULT_TYPE_CAST(v) packSnorm2x16(v)
#define SAMPLE_TYPE_CAST(v) unpackSnorm2x16(v)
+ #if OutputFloats
+ #define OUT_DATA_TYPE vec2
+ #define RESULT_TYPE_CAST(v) (clamp((v), -1.0, 1.0) * 32767.0f)
+ #else
+ #define RESULT_TYPE_CAST(v) packSnorm2x16(v)
+ #endif
+#endif
+
+#ifndef OUT_DATA_TYPE
+ #define OUT_DATA_TYPE DATA_TYPE
#endif
#if ComplexFilter
@@ -24,7 +33,7 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 {
};
layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
- DATA_TYPE out_data[];
+ OUT_DATA_TYPE out_data[];
};
layout(std430, binding = 3) readonly restrict buffer buffer_3 {
@@ -55,7 +64,7 @@ SAMPLE_TYPE sample_rf(uint index)
return result;
}
-shared SAMPLE_TYPE rf[FilterLength + gl_WorkGroupSize.x - 1];
+shared SAMPLE_TYPE rf[DecimationRate * gl_WorkGroupSize.x + FilterLength - 1];
void main()
{