Commit: c181b332e633070145251210905a88a4fa920492
Parent: b6d2eb9dcb2c12841a2dc60374e11a7d97ac42fd
Author: Randy Palamar
Date: Thu, 25 Sep 2025 09:27:07 -0600
shaders/filter: switch to parameter baking at compile time
this gives ~8% performance boost
Diffstat:
6 files changed, 172 insertions(+), 184 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -67,7 +67,7 @@ beamformer_compute_plan_release(BeamformerComputeContext *cc, u32 block)
glDeleteBuffers(countof(cp->ubos), cp->ubos);
glDeleteTextures(countof(cp->textures), cp->textures);
for (u32 i = 0; i < countof(cp->filters); i++)
- glDeleteTextures(1, &cp->filters[i].texture);
+ glDeleteBuffers(1, &cp->filters[i].ssbo);
cc->compute_plans[block] = 0;
SLLPushFreelist(cp, cc->compute_plan_freelist);
}
@@ -165,11 +165,10 @@ beamformer_filter_update(BeamformerFilter *f, BeamformerFilterKind kind,
f->kind = kind;
f->parameters = fp;
- glDeleteTextures(1, &f->texture);
- glCreateTextures(GL_TEXTURE_1D, 1, &f->texture);
- glTextureStorage1D(f->texture, 1, fp.complex? GL_RG32F : GL_R32F, f->length);
- glTextureSubImage1D(f->texture, 0, 0, f->length, fp.complex? GL_RG : GL_RED, GL_FLOAT, filter);
- glObjectLabel(GL_TEXTURE, f->texture, (i32)label.len, (c8 *)label.data);
+ glDeleteBuffers(1, &f->ssbo);
+ glCreateBuffers(1, &f->ssbo);
+ glNamedBufferStorage(f->ssbo, f->length * (i32)sizeof(f32) * (fp.complex? 2 : 1), filter, 0);
+ glObjectLabel(GL_BUFFER, f->ssbo, (i32)label.len, (c8 *)label.data);
}
function ComputeFrameIterator
@@ -516,31 +515,37 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
commit = 1;
}break;
case BeamformerShaderKind_Demodulate:{
+ BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake;
BeamformerFilter *f = cp->filters + sp->filter_slot;
- i32 local_flags = BeamformerShaderFilterFlags_Demodulate;
- if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter;
- if (!decode_first) local_flags |= BeamformerShaderFilterFlags_MapChannels;
- BeamformerDataKind filter_data_kind = data_kind;
- if (decode_first)
- filter_data_kind = BeamformerDataKind_Float32;
+ bp->time_offset += f->time_delay;
- match = beamformer_shader_demodulate_match(filter_data_kind, pb->parameters.sampling_mode, local_flags);
+ b->filter_length = (u32)f->length;
+ b->sampling_mode = pb->parameters.sampling_mode;
+ b->shader_flags = BeamformerShaderFilterFlags_Demodulate;
+ if (f->parameters.complex) b->shader_flags |= BeamformerShaderFilterFlags_ComplexFilter;
+ if (!decode_first) b->shader_flags |= BeamformerShaderFilterFlags_MapChannels;
+
+ b->data_kind = data_kind;
+ if (decode_first)
+ b->data_kind = BeamformerDataKind_Float32;
- bp->time_offset += f->time_delay;
commit = 1;
}break;
case BeamformerShaderKind_Filter:{
+ BeamformerShaderFilterBakeParameters *b = &cp->filter_bake;
BeamformerFilter *f = cp->filters + sp->filter_slot;
- i32 local_flags = 0;
- if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter;
- BeamformerDataKind filter_data_kind = data_kind;
+ bp->time_offset += f->time_delay;
+
+ b->filter_length = (u32)f->length;
+ b->shader_flags = 0;
+ if (f->parameters.complex) b->shader_flags |= BeamformerShaderFilterFlags_ComplexFilter;
+
+ b->data_kind = data_kind;
if (decode_first)
- filter_data_kind = BeamformerDataKind_Float32;
+ b->data_kind = BeamformerDataKind_Float32;
- match = beamformer_shader_filter_match(filter_data_kind, local_flags);
- bp->time_offset += f->time_delay;
commit = 1;
}break;
case BeamformerShaderKind_DAS:{
@@ -611,31 +616,32 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
* IQ[n] = I[n] - j*Q[n]
*/
if (demodulate) {
+ BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake;
BeamformerFilterUBO *mp = &cp->demod_ubo_data;
mp->demodulation_frequency = bp->demodulation_frequency;
mp->sampling_frequency = bp->sampling_frequency / 2;
- mp->decimation_rate = decimation_rate;
+ b->decimation_rate = decimation_rate;
- bp->sampling_frequency /= 2 * (f32)mp->decimation_rate;
- cp->das_bake.sample_count /= 2 * mp->decimation_rate;
+ bp->sampling_frequency /= 2 * (f32)b->decimation_rate;
+ cp->das_bake.sample_count /= 2 * b->decimation_rate;
if (decode_first) {
- mp->input_channel_stride = dp->output_channel_stride;
- mp->input_sample_stride = dp->output_sample_stride;
- mp->input_transmit_stride = dp->output_transmit_stride;
+ b->input_channel_stride = dp->output_channel_stride;
+ b->input_sample_stride = dp->output_sample_stride;
+ b->input_transmit_stride = dp->output_transmit_stride;
- mp->output_channel_stride = das_channel_stride;
- mp->output_sample_stride = das_sample_stride;
- mp->output_transmit_stride = das_transmit_stride;
+ b->output_channel_stride = das_channel_stride;
+ b->output_sample_stride = das_sample_stride;
+ b->output_transmit_stride = das_transmit_stride;
} else {
- mp->input_channel_stride = input_channel_stride / 2;
- mp->input_sample_stride = input_sample_stride;
- mp->input_transmit_stride = input_transmit_stride / 2;
+ b->input_channel_stride = input_channel_stride / 2;
+ b->input_sample_stride = input_sample_stride;
+ b->input_transmit_stride = input_transmit_stride / 2;
/* NOTE(rnp): output optimized layout for decoding */
- mp->output_channel_stride = dp->input_channel_stride;
- mp->output_sample_stride = dp->input_sample_stride;
- mp->output_transmit_stride = dp->input_transmit_stride;
+ b->output_channel_stride = dp->input_channel_stride;
+ b->output_sample_stride = dp->input_sample_stride;
+ b->output_transmit_stride = dp->input_transmit_stride;
cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X);
}
@@ -651,16 +657,17 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
else cp->rf_size *= 4;
/* TODO(rnp): UBO per filter stage */
+ BeamformerShaderFilterBakeParameters *fltb = &cp->filter_bake;
BeamformerFilterUBO *flt = &cp->filter_ubo_data;
- flt->demodulation_frequency = bp->demodulation_frequency;
- flt->sampling_frequency = bp->sampling_frequency;
- flt->decimation_rate = 1;
- flt->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count;
- flt->output_sample_stride = 1;
- flt->output_transmit_stride = cp->das_bake.sample_count;
- flt->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count;
- flt->input_sample_stride = 1;
- flt->input_transmit_stride = cp->das_bake.sample_count;
+ flt->demodulation_frequency = bp->demodulation_frequency;
+ flt->sampling_frequency = bp->sampling_frequency;
+ fltb->decimation_rate = 1;
+ fltb->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count;
+ fltb->output_sample_stride = 1;
+ fltb->output_transmit_stride = cp->das_bake.sample_count;
+ fltb->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count;
+ fltb->input_sample_stride = 1;
+ fltb->input_transmit_stride = cp->das_bake.sample_count;
}
function void
@@ -731,20 +738,20 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl
#undef X
};
- BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot];
- BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + shader;
+ BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot];
u32 program = 0;
i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[shader];
if (reloadable_index != -1) {
BeamformerShaderKind base_shader = beamformer_reloadable_shader_kinds[reloadable_index];
+ BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + base_shader;
s8 path = push_s8_from_parts(&arena, ctx->os.path_separator, s8("shaders"),
beamformer_reloadable_shader_files[reloadable_index]);
Stream shader_stream = arena_stream(arena);
stream_push_shader_header(&shader_stream, base_shader, compute_headers[base_shader]);
- i32 *header_indices = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors];
+ i32 *header_indices = beamformer_shader_header_vectors[reloadable_index];
for (i32 index = 0; index < sd->header_vector_length; index++)
stream_append_s8(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]]);
@@ -768,8 +775,10 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl
i32 count = beamformer_shader_bake_parameter_name_counts[reloadable_index];
u32 *parameters = 0;
/* TODO(rnp): generate this */
- switch (base_shader) {
- case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break;
+ switch (shader) {
+ case BeamformerShaderKind_Demodulate:{ parameters = cp->demodulate_bake.E; }break;
+ case BeamformerShaderKind_Filter:{ parameters = cp->filter_bake.E; }break;
+ case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break;
default:{}break;
}
@@ -883,9 +892,6 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
{
BeamformerComputeContext *cc = &ctx->compute_context;
- i32 *match_vector = beamformer_shader_match_vectors[cp->shader_matches[shader_slot]];
- BeamformerShaderDescriptor *shader_descriptor = beamformer_shader_descriptors + shader;
-
u32 program = cp->programs[shader_slot];
glUseProgram(program);
@@ -897,7 +903,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_Decode]);
glBindImageTexture(0, cp->textures[BeamformerComputeTextureKind_Hadamard], 0, 0, 0, GL_READ_ONLY, GL_R8I);
- if (shader == cp->pipeline.shaders[0]) {
+ if (shader_slot == 0) {
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[input_ssbo_idx]);
glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I);
glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 1);
@@ -927,21 +933,20 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
case BeamformerShaderKind_Filter:
case BeamformerShaderKind_Demodulate:
{
- i32 local_flags = match_vector[shader_descriptor->match_vector_length];
- b32 map_channels = (local_flags & BeamformerShaderFilterFlags_MapChannels) != 0;
+ BeamformerShaderFilterBakeParameters *b = &cp->filter_bake;
+ if (shader == BeamformerShaderKind_Demodulate) b = &cp->demodulate_bake;
+
+ b32 map_channels = (b->shader_flags & BeamformerShaderFilterFlags_MapChannels) != 0;
u32 index = shader == BeamformerShaderKind_Filter ? BeamformerComputeUBOKind_Filter
: BeamformerComputeUBOKind_Demodulate;
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[index]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cp->filters[sp->filter_slot].ssbo);
if (!map_channels)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]);
-
- GLenum kind = cp->filters[sp->filter_slot].parameters.complex? GL_RG32F : GL_R32F;
- glBindImageTexture(0, cp->filters[sp->filter_slot].texture, 0, 0, 0, GL_READ_ONLY, kind);
-
- if (map_channels)
+ else
glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I);
glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z);
diff --git a/beamformer.h b/beamformer.h
@@ -104,18 +104,11 @@ typedef struct {
BeamformerFilterParameters parameters;
f32 time_delay;
i32 length;
- u32 texture;
+ u32 ssbo;
} BeamformerFilter;
/* X(name, type, gltype) */
#define BEAMFORMER_FILTER_UBO_PARAM_LIST \
- X(input_channel_stride, u32, uint) \
- X(input_sample_stride, u32, uint) \
- X(input_transmit_stride, u32, uint) \
- X(output_channel_stride, u32, uint) \
- X(output_sample_stride, u32, uint) \
- X(output_transmit_stride, u32, uint) \
- X(decimation_rate, u32, uint) \
X(demodulation_frequency, f32, float) \
X(sampling_frequency, f32, float)
@@ -152,7 +145,7 @@ typedef alignas(16) struct {
#define X(name, type, ...) type name;
BEAMFORMER_FILTER_UBO_PARAM_LIST
#undef X
- float _pad[3];
+ float _pad[2];
} BeamformerFilterUBO;
static_assert((sizeof(BeamformerFilterUBO) & 15) == 0, "UBO size must be a multiple of 16");
@@ -224,7 +217,9 @@ struct BeamformerComputePlan {
BEAMFORMER_COMPUTE_UBO_LIST
#undef X
- BeamformerShaderDASBakeParameters das_bake;
+ BeamformerShaderFilterBakeParameters demodulate_bake;
+ BeamformerShaderFilterBakeParameters filter_bake;
+ BeamformerShaderDASBakeParameters das_bake;
BeamformerComputePlan *next;
};
diff --git a/beamformer.meta b/beamformer.meta
@@ -1,6 +1,7 @@
@Enumeration(DataKind [Int16 Int16Complex Float32 Float32Complex])
@Enumeration(DecodeMode [None Hadamard])
@Enumeration(RCAOrientation [Rows Columns])
+@Enumeration(SamplingMode [2X 4X])
@ShaderGroup Compute
{
@@ -19,21 +20,26 @@
@Shader(filter.glsl) Filter
{
- @Permute(DataKind [Int16Complex Float32 Float32Complex])
+ @Enumeration(DataKind)
+ @Enumeration(SamplingMode)
+ @Flags([ComplexFilter MapChannels])
+
+ @Bake
{
- @PermuteFlags([MapChannels ComplexFilter])
+ @BakeVariable(DataKind data_kind )
+ @BakeVariable(DecimationRate decimation_rate )
+ @BakeVariable(FilterLength filter_length )
+ @BakeVariable(InputChannelStride input_channel_stride )
+ @BakeVariable(InputSampleStride input_sample_stride )
+ @BakeVariable(InputTransmitStride input_transmit_stride )
+ @BakeVariable(OutputChannelStride output_channel_stride )
+ @BakeVariable(OutputSampleStride output_sample_stride )
+ @BakeVariable(OutputTransmitStride output_transmit_stride)
+ @BakeVariable(ShaderFlags shader_flags )
+ @BakeVariable(SamplingMode sampling_mode )
}
@SubShader Demodulate
- {
- @Permute(DataKind [Int16 Float32])
- {
- @Permute(SamplingMode [2X 4X])
- {
- @PermuteFlags([MapChannels ComplexFilter])
- }
- }
- }
}
@Shader(das.glsl) DAS
diff --git a/build.c b/build.c
@@ -2200,22 +2200,25 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
meta_push_shader_reload_info(m, ctx);
meta_begin_scope(m, s8("read_only global i32 *beamformer_shader_header_vectors[] = {"));
- for (iz shader = 0; shader < ctx->shaders.count; shader++) {
- MetaShader *s = ctx->shaders.data + shader;
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ MetaBaseShader *bs = ctx->base_shaders.data + shader;
+ MetaShader *s = bs->shader;
- if (s->global_flag_ids.count || s->global_enumeration_ids.count) {
- meta_begin_line(m, s8("(i32 []){"));
- for (iz id = 0; id < s->global_flag_ids.count; id++) {
- if (id != 0) meta_push(m, s8(", "));
- meta_push_u64(m, s->global_flag_ids.data[id]);
- }
- for (iz id = 0; id < s->global_enumeration_ids.count; id++) {
- if (id != 0 || s->global_flag_ids.count) meta_push(m, s8(", "));
- meta_push_u64(m, s->global_enumeration_ids.data[id]);
+ if (bs->file.len) {
+ if (s->global_flag_ids.count || s->global_enumeration_ids.count) {
+ meta_begin_line(m, s8("(i32 []){"));
+ for (iz id = 0; id < s->global_flag_ids.count; id++) {
+ if (id != 0) meta_push(m, s8(", "));
+ meta_push_u64(m, s->global_flag_ids.data[id]);
+ }
+ for (iz id = 0; id < s->global_enumeration_ids.count; id++) {
+ if (id != 0 || s->global_flag_ids.count) meta_push(m, s8(", "));
+ meta_push_u64(m, s->global_enumeration_ids.data[id]);
+ }
+ meta_end_line(m, s8("},"));
+ } else {
+ meta_push_line(m, s8("0,"));
}
- meta_end_line(m, s8("},"));
- } else {
- meta_push_line(m, s8("0,"));
}
}
meta_end_scope(m, s8("};\n"));
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -33,8 +33,8 @@ typedef enum {
} BeamformerShaderDecodeFlags;
typedef enum {
- BeamformerShaderFilterFlags_MapChannels = (1 << 0),
- BeamformerShaderFilterFlags_ComplexFilter = (1 << 1),
+ BeamformerShaderFilterFlags_ComplexFilter = (1 << 0),
+ BeamformerShaderFilterFlags_MapChannels = (1 << 1),
BeamformerShaderFilterFlags_Demodulate = (1 << 2),
} BeamformerShaderFilterFlags;
@@ -75,6 +75,23 @@ typedef struct {
typedef union {
struct {
+ u32 data_kind;
+ u32 decimation_rate;
+ u32 filter_length;
+ u32 input_channel_stride;
+ u32 input_sample_stride;
+ u32 input_transmit_stride;
+ u32 output_channel_stride;
+ u32 output_sample_stride;
+ u32 output_transmit_stride;
+ u32 shader_flags;
+ u32 sampling_mode;
+ };
+ u32 E[11];
+} BeamformerShaderFilterBakeParameters;
+
+typedef union {
+ struct {
u32 acquisition_count;
u32 channel_count;
u32 data_kind;
@@ -97,43 +114,8 @@ read_only global i32 *beamformer_shader_match_vectors[] = {
(i32 []){BeamformerDataKind_Float32, 0x00},
(i32 []){BeamformerDataKind_Float32Complex, 0x00},
// Filter
- (i32 []){BeamformerDataKind_Int16Complex, 0x00},
- (i32 []){BeamformerDataKind_Int16Complex, 0x01},
- (i32 []){BeamformerDataKind_Int16Complex, 0x02},
- (i32 []){BeamformerDataKind_Int16Complex, 0x03},
- (i32 []){BeamformerDataKind_Float32, 0x00},
- (i32 []){BeamformerDataKind_Float32, 0x01},
- (i32 []){BeamformerDataKind_Float32, 0x02},
- (i32 []){BeamformerDataKind_Float32, 0x03},
- (i32 []){BeamformerDataKind_Float32Complex, 0x00},
- (i32 []){BeamformerDataKind_Float32Complex, 0x01},
- (i32 []){BeamformerDataKind_Float32Complex, 0x02},
- (i32 []){BeamformerDataKind_Float32Complex, 0x03},
+ 0,
// Demodulate
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x04},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x05},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x06},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x07},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x04},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x05},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x06},
- (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x07},
- (i32 []){BeamformerDataKind_Int16, -1, 0x04},
- (i32 []){BeamformerDataKind_Int16, -1, 0x05},
- (i32 []){BeamformerDataKind_Int16, -1, 0x06},
- (i32 []){BeamformerDataKind_Int16, -1, 0x07},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x04},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x05},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x06},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x07},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x04},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x05},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x06},
- (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x07},
- (i32 []){BeamformerDataKind_Float32, -1, 0x04},
- (i32 []){BeamformerDataKind_Float32, -1, 0x05},
- (i32 []){BeamformerDataKind_Float32, -1, 0x06},
- (i32 []){BeamformerDataKind_Float32, -1, 0x07},
// DAS
0,
// MinMax
@@ -143,18 +125,18 @@ read_only global i32 *beamformer_shader_match_vectors[] = {
// Render3D
0,
};
-#define beamformer_match_vectors_count (47)
+#define beamformer_match_vectors_count (12)
read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {
{0, 1, 0, 0, 0},
{1, 2, 0, 0, 0},
{2, 7, 1, 2, 1},
- {7, 19, 1, 1, 1},
- {19, 43, 2, 2, 1},
- {43, 44, 0, 2, 0},
- {44, 45, 0, 0, 0},
- {45, 46, 0, 0, 0},
- {46, 47, 0, 0, 0},
+ {7, 8, 0, 2, 0},
+ {8, 8, 0, 0, 0},
+ {8, 9, 0, 2, 0},
+ {9, 10, 0, 0, 0},
+ {10, 11, 0, 0, 0},
+ {11, 12, 0, 0, 0},
};
read_only global s8 beamformer_shader_names[] = {
@@ -237,8 +219,8 @@ read_only global s8 beamformer_shader_local_header_strings[] = {
"#define ShaderFlags_DilateOutput (1 << 0)\n"
"\n"),
s8_comp(""
- "#define ShaderFlags_MapChannels (1 << 0)\n"
- "#define ShaderFlags_ComplexFilter (1 << 1)\n"
+ "#define ShaderFlags_ComplexFilter (1 << 0)\n"
+ "#define ShaderFlags_MapChannels (1 << 1)\n"
"#define ShaderFlags_Demodulate (1 << 2)\n"
"\n"),
s8_comp(""
@@ -260,10 +242,7 @@ read_only global s8 beamformer_shader_descriptor_header_strings[] = {
};
read_only global i32 *beamformer_shader_header_vectors[] = {
- 0,
- 0,
(i32 []){0, 1},
- (i32 []){0},
(i32 []){0, 3},
(i32 []){0, 2},
0,
@@ -273,7 +252,19 @@ read_only global i32 *beamformer_shader_header_vectors[] = {
read_only global s8 *beamformer_shader_bake_parameter_names[] = {
0,
- 0,
+ (s8 []){
+ s8_comp("DataKind"),
+ s8_comp("DecimationRate"),
+ s8_comp("FilterLength"),
+ s8_comp("InputChannelStride"),
+ s8_comp("InputSampleStride"),
+ s8_comp("InputTransmitStride"),
+ s8_comp("OutputChannelStride"),
+ s8_comp("OutputSampleStride"),
+ s8_comp("OutputTransmitStride"),
+ s8_comp("ShaderFlags"),
+ s8_comp("SamplingMode"),
+ },
(s8 []){
s8_comp("AcquisitionCount"),
s8_comp("ChannelCount"),
@@ -289,7 +280,7 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
read_only global i32 beamformer_shader_bake_parameter_name_counts[] = {
0,
- 0,
+ 11,
6,
0,
0,
@@ -325,17 +316,3 @@ beamformer_shader_decode_match(BeamformerDataKind a, i32 flags)
return result;
}
-function iz
-beamformer_shader_filter_match(BeamformerDataKind a, i32 flags)
-{
- iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 7, 19, 2);
- return result;
-}
-
-function iz
-beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode b, i32 flags)
-{
- iz result = beamformer_shader_match((i32 []){(i32)a, (i32)b, flags}, 19, 43, 3);
- return result;
-}
-
diff --git a/shaders/filter.glsl b/shaders/filter.glsl
@@ -9,6 +9,14 @@
#define SAMPLE_TYPE_CAST(v) unpackSnorm2x16(v)
#endif
+#if (ShaderFlags & ShaderFlags_ComplexFilter)
+ #define FILTER_TYPE vec2
+ #define apply_filter(iq, h) complex_mul((iq), (h))
+#else
+ #define FILTER_TYPE float
+ #define apply_filter(iq, h) ((iq) * (h))
+#endif
+
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
DATA_TYPE in_data[];
};
@@ -17,15 +25,11 @@ layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
DATA_TYPE out_data[];
};
-layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping;
+layout(std430, binding = 3) readonly restrict buffer buffer_3 {
+ FILTER_TYPE filter_coefficients[];
+};
-#if (ShaderFlags & ShaderFlags_ComplexFilter)
- layout(rg32f, binding = 0) readonly restrict uniform image1D filter_coefficients;
- #define apply_filter(iq, h) complex_mul((iq), (h).xy)
-#else
- layout(r32f, binding = 0) readonly restrict uniform image1D filter_coefficients;
- #define apply_filter(iq, h) ((iq) * (h).x)
-#endif
+layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping;
const bool map_channels = (ShaderFlags & ShaderFlags_MapChannels) != 0;
@@ -46,9 +50,8 @@ vec2 rotate_iq(vec2 iq, int index)
// arg = PI * index
// cos -> 1 -1 1 -1
// sin -> 0 0 0 0
- /* NOTE(rnp): faster than taking iq or -iq, good job shader compiler */
- if (bool(index & 1)) result = mat2(-1, 0, 0, -1) * iq;
- else result = mat2( 1, 0, 0, 1) * iq;
+ const float scale = bool(index & 1) ? -1 : 1;
+ result = scale * iq;
}break;
case SamplingMode_2X:{
// fs = fd
@@ -76,37 +79,36 @@ vec2 sample_rf(uint index)
void main()
{
- uint in_sample = gl_GlobalInvocationID.x * decimation_rate;
+ uint in_sample = gl_GlobalInvocationID.x * DecimationRate;
uint out_sample = gl_GlobalInvocationID.x;
uint channel = gl_GlobalInvocationID.y;
uint transmit = gl_GlobalInvocationID.z;
uint in_channel = map_channels ? imageLoad(channel_mapping, int(channel)).x : channel;
- uint in_offset = input_channel_stride * in_channel + input_transmit_stride * transmit;
- uint out_offset = output_channel_stride * channel +
- output_transmit_stride * transmit +
- output_sample_stride * out_sample;
+ uint in_offset = InputChannelStride * in_channel + InputTransmitStride * transmit;
+ uint out_offset = OutputChannelStride * channel +
+ OutputTransmitStride * transmit +
+ OutputSampleStride * out_sample;
int target;
if (map_channels) {
- target = int(output_channel_stride / output_sample_stride);
+ target = OutputChannelStride / OutputSampleStride;
} else {
- target = int(output_transmit_stride);
+ target = OutputTransmitStride;
}
if (out_sample < target) {
- target *= int(decimation_rate);
+ target *= DecimationRate;
vec2 result = vec2(0);
int a_length = target;
- int b_length = imageSize(filter_coefficients).x;
int index = int(in_sample);
const float scale = bool(ShaderFlags & ShaderFlags_ComplexFilter) ? 1 : sqrt(2);
- for (int j = max(0, index - b_length); j < min(index, a_length); j++) {
- vec2 iq = sample_rf(in_offset + j);
- vec4 h = imageLoad(filter_coefficients, index - j);
+ for (int j = max(0, index - FilterLength); j < min(index, a_length); j++) {
+ vec2 iq = sample_rf(in_offset + j);
+ FILTER_TYPE h = filter_coefficients[index - j];
#if (ShaderFlags & ShaderFlags_Demodulate)
result += scale * apply_filter(rotate_iq(iq * vec2(1, -1), -j), h);
#else