Commit: 3939648169851306c32bf37a52f73ac8567c85fb
Parent: ba9b99fed19c9da46620c15c04639080dae659a6
Author: Randy Palamar
Date: Mon, 3 Nov 2025 06:10:20 -0700
shaders/decode: add a compile time flag for LDS
Diffstat:
4 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -505,8 +505,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
}
}
- if (run_cuda_hilbert) sd->bake.flags |= BeamformerShaderDecodeFlags_DilateOutput;
-
BeamformerShaderKind *last_shader = cp->pipeline.shaders + slot - 1;
assert(first || ((*last_shader == BeamformerShaderKind_Demodulate ||
*last_shader == BeamformerShaderKind_Filter)));
@@ -527,9 +525,13 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
db->output_transmit_stride *= decimation_rate;
}
+ if (run_cuda_hilbert) sd->bake.flags |= BeamformerShaderDecodeFlags_DilateOutput;
+ if (db->transmit_count > 32) sd->bake.flags |= BeamformerShaderDecodeFlags_UseSharedMemory;
+
db->transmits_processed = db->transmit_count >= 32 ? 2 : 1;
- b32 use_16z = db->transmit_count <= 32 || db->transmit_count == 80 || db->transmit_count == 96 || db->transmit_count == 160;
+ b32 use_16z = db->transmit_count <= 32 || db->transmit_count == 80 ||
+ db->transmit_count == 96 || db->transmit_count == 160;
sd->layout.x = 4;
sd->layout.y = 1;
sd->layout.z = use_16z? 16 : 32;
diff --git a/beamformer.meta b/beamformer.meta
@@ -59,7 +59,7 @@
{
@Enumeration(DataKind)
@Enumeration(DecodeMode)
- @Flags([DilateOutput])
+ @Flags([DilateOutput UseSharedMemory])
@Bake
{
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -53,7 +53,8 @@ typedef enum {
} BeamformerInterpolationMode;
typedef enum {
- BeamformerShaderDecodeFlags_DilateOutput = (1 << 0),
+ BeamformerShaderDecodeFlags_DilateOutput = (1 << 0),
+ BeamformerShaderDecodeFlags_UseSharedMemory = (1 << 1),
} BeamformerShaderDecodeFlags;
typedef enum {
@@ -240,6 +241,7 @@ read_only global s8 beamformer_shader_global_header_strings[] = {
read_only global s8 *beamformer_shader_flag_strings[] = {
(s8 []){
s8_comp("DilateOutput"),
+ s8_comp("UseSharedMemory"),
},
(s8 []){
s8_comp("ComplexFilter"),
@@ -259,7 +261,7 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
};
read_only global u8 beamformer_shader_flag_strings_count[] = {
- 1,
+ 2,
3,
5,
0,
diff --git a/shaders/decode.glsl b/shaders/decode.glsl
@@ -90,7 +90,7 @@ void main()
result[i] = sample_rf_data(rf_offset + transmit + i);
}break;
case DecodeMode_Hadamard:{
- #if TransmitCount > 32
+ #if UseSharedMemory
{
uint thread_count = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;
uint thread_index = gl_LocalInvocationIndex;
@@ -115,7 +115,7 @@ void main()
result[i] = SAMPLE_DATA_TYPE(0);
for (int j = 0; j < TransmitCount; j++) {
- #if TransmitCount > 32
+ #if UseSharedMemory
SAMPLE_DATA_TYPE s = SAMPLE_TYPE_CAST(rf[gl_LocalInvocationID.x * TransmitCount + j]);
#else
SAMPLE_DATA_TYPE s = sample_rf_data(rf_offset + j);