ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: c181b332e633070145251210905a88a4fa920492
Parent: b6d2eb9dcb2c12841a2dc60374e11a7d97ac42fd
Author: Randy Palamar
Date:   Thu, 25 Sep 2025 09:27:07 -0600

shaders/filter: switch to parameter baking at compile time

this gives ~8% performance boost

Diffstat:
Mbeamformer.c | 125+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mbeamformer.h | 15+++++----------
Mbeamformer.meta | 28+++++++++++++++++-----------
Mbuild.c | 31+++++++++++++++++--------------
Mgenerated/beamformer.meta.c | 109+++++++++++++++++++++++++++++++------------------------------------------------
Mshaders/filter.glsl | 48+++++++++++++++++++++++++-----------------------
6 files changed, 172 insertions(+), 184 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -67,7 +67,7 @@ beamformer_compute_plan_release(BeamformerComputeContext *cc, u32 block) glDeleteBuffers(countof(cp->ubos), cp->ubos); glDeleteTextures(countof(cp->textures), cp->textures); for (u32 i = 0; i < countof(cp->filters); i++) - glDeleteTextures(1, &cp->filters[i].texture); + glDeleteBuffers(1, &cp->filters[i].ssbo); cc->compute_plans[block] = 0; SLLPushFreelist(cp, cc->compute_plan_freelist); } @@ -165,11 +165,10 @@ beamformer_filter_update(BeamformerFilter *f, BeamformerFilterKind kind, f->kind = kind; f->parameters = fp; - glDeleteTextures(1, &f->texture); - glCreateTextures(GL_TEXTURE_1D, 1, &f->texture); - glTextureStorage1D(f->texture, 1, fp.complex? GL_RG32F : GL_R32F, f->length); - glTextureSubImage1D(f->texture, 0, 0, f->length, fp.complex? GL_RG : GL_RED, GL_FLOAT, filter); - glObjectLabel(GL_TEXTURE, f->texture, (i32)label.len, (c8 *)label.data); + glDeleteBuffers(1, &f->ssbo); + glCreateBuffers(1, &f->ssbo); + glNamedBufferStorage(f->ssbo, f->length * (i32)sizeof(f32) * (fp.complex? 2 : 1), filter, 0); + glObjectLabel(GL_BUFFER, f->ssbo, (i32)label.len, (c8 *)label.data); } function ComputeFrameIterator @@ -516,31 +515,37 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) commit = 1; }break; case BeamformerShaderKind_Demodulate:{ + BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake; BeamformerFilter *f = cp->filters + sp->filter_slot; - i32 local_flags = BeamformerShaderFilterFlags_Demodulate; - if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter; - if (!decode_first) local_flags |= BeamformerShaderFilterFlags_MapChannels; - BeamformerDataKind filter_data_kind = data_kind; - if (decode_first) - filter_data_kind = BeamformerDataKind_Float32; + bp->time_offset += f->time_delay; - match = beamformer_shader_demodulate_match(filter_data_kind, pb->parameters.sampling_mode, local_flags); + b->filter_length = (u32)f->length; + b->sampling_mode = pb->parameters.sampling_mode; + b->shader_flags = BeamformerShaderFilterFlags_Demodulate; + if (f->parameters.complex) b->shader_flags |= BeamformerShaderFilterFlags_ComplexFilter; + if (!decode_first) b->shader_flags |= BeamformerShaderFilterFlags_MapChannels; + + b->data_kind = data_kind; + if (decode_first) + b->data_kind = BeamformerDataKind_Float32; - bp->time_offset += f->time_delay; commit = 1; }break; case BeamformerShaderKind_Filter:{ + BeamformerShaderFilterBakeParameters *b = &cp->filter_bake; BeamformerFilter *f = cp->filters + sp->filter_slot; - i32 local_flags = 0; - if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter; - BeamformerDataKind filter_data_kind = data_kind; + bp->time_offset += f->time_delay; + + b->filter_length = (u32)f->length; + b->shader_flags = 0; + if (f->parameters.complex) b->shader_flags |= BeamformerShaderFilterFlags_ComplexFilter; + + b->data_kind = data_kind; if (decode_first) - filter_data_kind = BeamformerDataKind_Float32; + b->data_kind = BeamformerDataKind_Float32; - match = beamformer_shader_filter_match(filter_data_kind, local_flags); - bp->time_offset += f->time_delay; commit = 1; }break; case BeamformerShaderKind_DAS:{ @@ -611,31 +616,32 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) * IQ[n] = I[n] - j*Q[n] */ if (demodulate) { + BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake; BeamformerFilterUBO *mp = &cp->demod_ubo_data; mp->demodulation_frequency = bp->demodulation_frequency; mp->sampling_frequency = bp->sampling_frequency / 2; - mp->decimation_rate = decimation_rate; + b->decimation_rate = decimation_rate; - bp->sampling_frequency /= 2 * (f32)mp->decimation_rate; - cp->das_bake.sample_count /= 2 * mp->decimation_rate; + bp->sampling_frequency /= 2 * (f32)b->decimation_rate; + cp->das_bake.sample_count /= 2 * b->decimation_rate; if (decode_first) { - mp->input_channel_stride = dp->output_channel_stride; - mp->input_sample_stride = dp->output_sample_stride; - mp->input_transmit_stride = dp->output_transmit_stride; + b->input_channel_stride = dp->output_channel_stride; + b->input_sample_stride = dp->output_sample_stride; + b->input_transmit_stride = dp->output_transmit_stride; - mp->output_channel_stride = das_channel_stride; - mp->output_sample_stride = das_sample_stride; - mp->output_transmit_stride = das_transmit_stride; + b->output_channel_stride = das_channel_stride; + b->output_sample_stride = das_sample_stride; + b->output_transmit_stride = das_transmit_stride; } else { - mp->input_channel_stride = input_channel_stride / 2; - mp->input_sample_stride = input_sample_stride; - mp->input_transmit_stride = input_transmit_stride / 2; + b->input_channel_stride = input_channel_stride / 2; + b->input_sample_stride = input_sample_stride; + b->input_transmit_stride = input_transmit_stride / 2; /* NOTE(rnp): output optimized layout for decoding */ - mp->output_channel_stride = dp->input_channel_stride; - mp->output_sample_stride = dp->input_sample_stride; - mp->output_transmit_stride = dp->input_transmit_stride; + b->output_channel_stride = dp->input_channel_stride; + b->output_sample_stride = dp->input_sample_stride; + b->output_transmit_stride = dp->input_transmit_stride; cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X); } @@ -651,16 +657,17 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) else cp->rf_size *= 4; /* TODO(rnp): UBO per filter stage */ + BeamformerShaderFilterBakeParameters *fltb = &cp->filter_bake; BeamformerFilterUBO *flt = &cp->filter_ubo_data; - flt->demodulation_frequency = bp->demodulation_frequency; - flt->sampling_frequency = bp->sampling_frequency; - flt->decimation_rate = 1; - flt->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; - flt->output_sample_stride = 1; - flt->output_transmit_stride = cp->das_bake.sample_count; - flt->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; - flt->input_sample_stride = 1; - flt->input_transmit_stride = cp->das_bake.sample_count; + flt->demodulation_frequency = bp->demodulation_frequency; + flt->sampling_frequency = bp->sampling_frequency; + fltb->decimation_rate = 1; + fltb->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; + fltb->output_sample_stride = 1; + fltb->output_transmit_stride = cp->das_bake.sample_count; + fltb->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; + fltb->input_sample_stride = 1; + fltb->input_transmit_stride = cp->das_bake.sample_count; } function void @@ -731,20 +738,20 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl #undef X }; - BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot]; - BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + shader; + BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot]; u32 program = 0; i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[shader]; if (reloadable_index != -1) { BeamformerShaderKind base_shader = beamformer_reloadable_shader_kinds[reloadable_index]; + BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + base_shader; s8 path = push_s8_from_parts(&arena, ctx->os.path_separator, s8("shaders"), beamformer_reloadable_shader_files[reloadable_index]); Stream shader_stream = arena_stream(arena); stream_push_shader_header(&shader_stream, base_shader, compute_headers[base_shader]); - i32 *header_indices = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors]; + i32 *header_indices = beamformer_shader_header_vectors[reloadable_index]; for (i32 index = 0; index < sd->header_vector_length; index++) stream_append_s8(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]]); @@ -768,8 +775,10 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl i32 count = beamformer_shader_bake_parameter_name_counts[reloadable_index]; u32 *parameters = 0; /* TODO(rnp): generate this */ - switch (base_shader) { - case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break; + switch (shader) { + case BeamformerShaderKind_Demodulate:{ parameters = cp->demodulate_bake.E; }break; + case BeamformerShaderKind_Filter:{ parameters = cp->filter_bake.E; }break; + case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break; default:{}break; } @@ -883,9 +892,6 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame { BeamformerComputeContext *cc = &ctx->compute_context; - i32 *match_vector = beamformer_shader_match_vectors[cp->shader_matches[shader_slot]]; - BeamformerShaderDescriptor *shader_descriptor = beamformer_shader_descriptors + shader; - u32 program = cp->programs[shader_slot]; glUseProgram(program); @@ -897,7 +903,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_Decode]); glBindImageTexture(0, cp->textures[BeamformerComputeTextureKind_Hadamard], 0, 0, 0, GL_READ_ONLY, GL_R8I); - if (shader == cp->pipeline.shaders[0]) { + if (shader_slot == 0) { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[input_ssbo_idx]); glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I); glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 1); @@ -927,21 +933,20 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_Filter: case BeamformerShaderKind_Demodulate: { - i32 local_flags = match_vector[shader_descriptor->match_vector_length]; - b32 map_channels = (local_flags & BeamformerShaderFilterFlags_MapChannels) != 0; + BeamformerShaderFilterBakeParameters *b = &cp->filter_bake; + if (shader == BeamformerShaderKind_Demodulate) b = &cp->demodulate_bake; + + b32 map_channels = (b->shader_flags & BeamformerShaderFilterFlags_MapChannels) != 0; u32 index = shader == BeamformerShaderKind_Filter ? BeamformerComputeUBOKind_Filter : BeamformerComputeUBOKind_Demodulate; glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[index]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cp->filters[sp->filter_slot].ssbo); if (!map_channels) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]); - - GLenum kind = cp->filters[sp->filter_slot].parameters.complex? GL_RG32F : GL_R32F; - glBindImageTexture(0, cp->filters[sp->filter_slot].texture, 0, 0, 0, GL_READ_ONLY, kind); - - if (map_channels) + else glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I); glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z); diff --git a/beamformer.h b/beamformer.h @@ -104,18 +104,11 @@ typedef struct { BeamformerFilterParameters parameters; f32 time_delay; i32 length; - u32 texture; + u32 ssbo; } BeamformerFilter; /* X(name, type, gltype) */ #define BEAMFORMER_FILTER_UBO_PARAM_LIST \ - X(input_channel_stride, u32, uint) \ - X(input_sample_stride, u32, uint) \ - X(input_transmit_stride, u32, uint) \ - X(output_channel_stride, u32, uint) \ - X(output_sample_stride, u32, uint) \ - X(output_transmit_stride, u32, uint) \ - X(decimation_rate, u32, uint) \ X(demodulation_frequency, f32, float) \ X(sampling_frequency, f32, float) @@ -152,7 +145,7 @@ typedef alignas(16) struct { #define X(name, type, ...) type name; BEAMFORMER_FILTER_UBO_PARAM_LIST #undef X - float _pad[3]; + float _pad[2]; } BeamformerFilterUBO; static_assert((sizeof(BeamformerFilterUBO) & 15) == 0, "UBO size must be a multiple of 16"); @@ -224,7 +217,9 @@ struct BeamformerComputePlan { BEAMFORMER_COMPUTE_UBO_LIST #undef X - BeamformerShaderDASBakeParameters das_bake; + BeamformerShaderFilterBakeParameters demodulate_bake; + BeamformerShaderFilterBakeParameters filter_bake; + BeamformerShaderDASBakeParameters das_bake; BeamformerComputePlan *next; }; diff --git a/beamformer.meta b/beamformer.meta @@ -1,6 +1,7 @@ @Enumeration(DataKind [Int16 Int16Complex Float32 Float32Complex]) @Enumeration(DecodeMode [None Hadamard]) @Enumeration(RCAOrientation [Rows Columns]) +@Enumeration(SamplingMode [2X 4X]) @ShaderGroup Compute { @@ -19,21 +20,26 @@ @Shader(filter.glsl) Filter { - @Permute(DataKind [Int16Complex Float32 Float32Complex]) + @Enumeration(DataKind) + @Enumeration(SamplingMode) + @Flags([ComplexFilter MapChannels]) + + @Bake { - @PermuteFlags([MapChannels ComplexFilter]) + @BakeVariable(DataKind data_kind ) + @BakeVariable(DecimationRate decimation_rate ) + @BakeVariable(FilterLength filter_length ) + @BakeVariable(InputChannelStride input_channel_stride ) + @BakeVariable(InputSampleStride input_sample_stride ) + @BakeVariable(InputTransmitStride input_transmit_stride ) + @BakeVariable(OutputChannelStride output_channel_stride ) + @BakeVariable(OutputSampleStride output_sample_stride ) + @BakeVariable(OutputTransmitStride output_transmit_stride) + @BakeVariable(ShaderFlags shader_flags ) + @BakeVariable(SamplingMode sampling_mode ) } @SubShader Demodulate - { - @Permute(DataKind [Int16 Float32]) - { - @Permute(SamplingMode [2X 4X]) - { - @PermuteFlags([MapChannels ComplexFilter]) - } - } - } } @Shader(das.glsl) DAS diff --git a/build.c b/build.c @@ -2200,22 +2200,25 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) meta_push_shader_reload_info(m, ctx); meta_begin_scope(m, s8("read_only global i32 *beamformer_shader_header_vectors[] = {")); - for (iz shader = 0; shader < ctx->shaders.count; shader++) { - MetaShader *s = ctx->shaders.data + shader; + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + MetaShader *s = bs->shader; - if (s->global_flag_ids.count || s->global_enumeration_ids.count) { - meta_begin_line(m, s8("(i32 []){")); - for (iz id = 0; id < s->global_flag_ids.count; id++) { - if (id != 0) meta_push(m, s8(", ")); - meta_push_u64(m, s->global_flag_ids.data[id]); - } - for (iz id = 0; id < s->global_enumeration_ids.count; id++) { - if (id != 0 || s->global_flag_ids.count) meta_push(m, s8(", ")); - meta_push_u64(m, s->global_enumeration_ids.data[id]); + if (bs->file.len) { + if (s->global_flag_ids.count || s->global_enumeration_ids.count) { + meta_begin_line(m, s8("(i32 []){")); + for (iz id = 0; id < s->global_flag_ids.count; id++) { + if (id != 0) meta_push(m, s8(", ")); + meta_push_u64(m, s->global_flag_ids.data[id]); + } + for (iz id = 0; id < s->global_enumeration_ids.count; id++) { + if (id != 0 || s->global_flag_ids.count) meta_push(m, s8(", ")); + meta_push_u64(m, s->global_enumeration_ids.data[id]); + } + meta_end_line(m, s8("},")); + } else { + meta_push_line(m, s8("0,")); } - meta_end_line(m, s8("},")); - } else { - meta_push_line(m, s8("0,")); } } meta_end_scope(m, s8("};\n")); diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -33,8 +33,8 @@ typedef enum { } BeamformerShaderDecodeFlags; typedef enum { - BeamformerShaderFilterFlags_MapChannels = (1 << 0), - BeamformerShaderFilterFlags_ComplexFilter = (1 << 1), + BeamformerShaderFilterFlags_ComplexFilter = (1 << 0), + BeamformerShaderFilterFlags_MapChannels = (1 << 1), BeamformerShaderFilterFlags_Demodulate = (1 << 2), } BeamformerShaderFilterFlags; @@ -75,6 +75,23 @@ typedef struct { typedef union { struct { + u32 data_kind; + u32 decimation_rate; + u32 filter_length; + u32 input_channel_stride; + u32 input_sample_stride; + u32 input_transmit_stride; + u32 output_channel_stride; + u32 output_sample_stride; + u32 output_transmit_stride; + u32 shader_flags; + u32 sampling_mode; + }; + u32 E[11]; +} BeamformerShaderFilterBakeParameters; + +typedef union { + struct { u32 acquisition_count; u32 channel_count; u32 data_kind; @@ -97,43 +114,8 @@ read_only global i32 *beamformer_shader_match_vectors[] = { (i32 []){BeamformerDataKind_Float32, 0x00}, (i32 []){BeamformerDataKind_Float32Complex, 0x00}, // Filter - (i32 []){BeamformerDataKind_Int16Complex, 0x00}, - (i32 []){BeamformerDataKind_Int16Complex, 0x01}, - (i32 []){BeamformerDataKind_Int16Complex, 0x02}, - (i32 []){BeamformerDataKind_Int16Complex, 0x03}, - (i32 []){BeamformerDataKind_Float32, 0x00}, - (i32 []){BeamformerDataKind_Float32, 0x01}, - (i32 []){BeamformerDataKind_Float32, 0x02}, - (i32 []){BeamformerDataKind_Float32, 0x03}, - (i32 []){BeamformerDataKind_Float32Complex, 0x00}, - (i32 []){BeamformerDataKind_Float32Complex, 0x01}, - (i32 []){BeamformerDataKind_Float32Complex, 0x02}, - (i32 []){BeamformerDataKind_Float32Complex, 0x03}, + 0, // Demodulate - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x04}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x05}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x06}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x07}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x04}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x05}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x06}, - (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x07}, - (i32 []){BeamformerDataKind_Int16, -1, 0x04}, - (i32 []){BeamformerDataKind_Int16, -1, 0x05}, - (i32 []){BeamformerDataKind_Int16, -1, 0x06}, - (i32 []){BeamformerDataKind_Int16, -1, 0x07}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x04}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x05}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x06}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x07}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x04}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x05}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x06}, - (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x07}, - (i32 []){BeamformerDataKind_Float32, -1, 0x04}, - (i32 []){BeamformerDataKind_Float32, -1, 0x05}, - (i32 []){BeamformerDataKind_Float32, -1, 0x06}, - (i32 []){BeamformerDataKind_Float32, -1, 0x07}, // DAS 0, // MinMax @@ -143,18 +125,18 @@ read_only global i32 *beamformer_shader_match_vectors[] = { // Render3D 0, }; -#define beamformer_match_vectors_count (47) +#define beamformer_match_vectors_count (12) read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { {0, 1, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 7, 1, 2, 1}, - {7, 19, 1, 1, 1}, - {19, 43, 2, 2, 1}, - {43, 44, 0, 2, 0}, - {44, 45, 0, 0, 0}, - {45, 46, 0, 0, 0}, - {46, 47, 0, 0, 0}, + {7, 8, 0, 2, 0}, + {8, 8, 0, 0, 0}, + {8, 9, 0, 2, 0}, + {9, 10, 0, 0, 0}, + {10, 11, 0, 0, 0}, + {11, 12, 0, 0, 0}, }; read_only global s8 beamformer_shader_names[] = { @@ -237,8 +219,8 @@ read_only global s8 beamformer_shader_local_header_strings[] = { "#define ShaderFlags_DilateOutput (1 << 0)\n" "\n"), s8_comp("" - "#define ShaderFlags_MapChannels (1 << 0)\n" - "#define ShaderFlags_ComplexFilter (1 << 1)\n" + "#define ShaderFlags_ComplexFilter (1 << 0)\n" + "#define ShaderFlags_MapChannels (1 << 1)\n" "#define ShaderFlags_Demodulate (1 << 2)\n" "\n"), s8_comp("" @@ -260,10 +242,7 @@ read_only global s8 beamformer_shader_descriptor_header_strings[] = { }; read_only global i32 *beamformer_shader_header_vectors[] = { - 0, - 0, (i32 []){0, 1}, - (i32 []){0}, (i32 []){0, 3}, (i32 []){0, 2}, 0, @@ -273,7 +252,19 @@ read_only global i32 *beamformer_shader_header_vectors[] = { read_only global s8 *beamformer_shader_bake_parameter_names[] = { 0, - 0, + (s8 []){ + s8_comp("DataKind"), + s8_comp("DecimationRate"), + s8_comp("FilterLength"), + s8_comp("InputChannelStride"), + s8_comp("InputSampleStride"), + s8_comp("InputTransmitStride"), + s8_comp("OutputChannelStride"), + s8_comp("OutputSampleStride"), + s8_comp("OutputTransmitStride"), + s8_comp("ShaderFlags"), + s8_comp("SamplingMode"), + }, (s8 []){ s8_comp("AcquisitionCount"), s8_comp("ChannelCount"), @@ -289,7 +280,7 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { read_only global i32 beamformer_shader_bake_parameter_name_counts[] = { 0, - 0, + 11, 6, 0, 0, @@ -325,17 +316,3 @@ beamformer_shader_decode_match(BeamformerDataKind a, i32 flags) return result; } -function iz -beamformer_shader_filter_match(BeamformerDataKind a, i32 flags) -{ - iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 7, 19, 2); - return result; -} - -function iz -beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode b, i32 flags) -{ - iz result = beamformer_shader_match((i32 []){(i32)a, (i32)b, flags}, 19, 43, 3); - return result; -} - diff --git a/shaders/filter.glsl b/shaders/filter.glsl @@ -9,6 +9,14 @@ #define SAMPLE_TYPE_CAST(v) unpackSnorm2x16(v) #endif +#if (ShaderFlags & ShaderFlags_ComplexFilter) + #define FILTER_TYPE vec2 + #define apply_filter(iq, h) complex_mul((iq), (h)) +#else + #define FILTER_TYPE float + #define apply_filter(iq, h) ((iq) * (h)) +#endif + layout(std430, binding = 1) readonly restrict buffer buffer_1 { DATA_TYPE in_data[]; }; @@ -17,15 +25,11 @@ layout(std430, binding = 2) writeonly restrict buffer buffer_2 { DATA_TYPE out_data[]; }; -layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping; +layout(std430, binding = 3) readonly restrict buffer buffer_3 { + FILTER_TYPE filter_coefficients[]; +}; -#if (ShaderFlags & ShaderFlags_ComplexFilter) - layout(rg32f, binding = 0) readonly restrict uniform image1D filter_coefficients; - #define apply_filter(iq, h) complex_mul((iq), (h).xy) -#else - layout(r32f, binding = 0) readonly restrict uniform image1D filter_coefficients; - #define apply_filter(iq, h) ((iq) * (h).x) -#endif +layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping; const bool map_channels = (ShaderFlags & ShaderFlags_MapChannels) != 0; @@ -46,9 +50,8 @@ vec2 rotate_iq(vec2 iq, int index) // arg = PI * index // cos -> 1 -1 1 -1 // sin -> 0 0 0 0 - /* NOTE(rnp): faster than taking iq or -iq, good job shader compiler */ - if (bool(index & 1)) result = mat2(-1, 0, 0, -1) * iq; - else result = mat2( 1, 0, 0, 1) * iq; + const float scale = bool(index & 1) ? -1 : 1; + result = scale * iq; }break; case SamplingMode_2X:{ // fs = fd @@ -76,37 +79,36 @@ vec2 sample_rf(uint index) void main() { - uint in_sample = gl_GlobalInvocationID.x * decimation_rate; + uint in_sample = gl_GlobalInvocationID.x * DecimationRate; uint out_sample = gl_GlobalInvocationID.x; uint channel = gl_GlobalInvocationID.y; uint transmit = gl_GlobalInvocationID.z; uint in_channel = map_channels ? imageLoad(channel_mapping, int(channel)).x : channel; - uint in_offset = input_channel_stride * in_channel + input_transmit_stride * transmit; - uint out_offset = output_channel_stride * channel + - output_transmit_stride * transmit + - output_sample_stride * out_sample; + uint in_offset = InputChannelStride * in_channel + InputTransmitStride * transmit; + uint out_offset = OutputChannelStride * channel + + OutputTransmitStride * transmit + + OutputSampleStride * out_sample; int target; if (map_channels) { - target = int(output_channel_stride / output_sample_stride); + target = OutputChannelStride / OutputSampleStride; } else { - target = int(output_transmit_stride); + target = OutputTransmitStride; } if (out_sample < target) { - target *= int(decimation_rate); + target *= DecimationRate; vec2 result = vec2(0); int a_length = target; - int b_length = imageSize(filter_coefficients).x; int index = int(in_sample); const float scale = bool(ShaderFlags & ShaderFlags_ComplexFilter) ? 1 : sqrt(2); - for (int j = max(0, index - b_length); j < min(index, a_length); j++) { - vec2 iq = sample_rf(in_offset + j); - vec4 h = imageLoad(filter_coefficients, index - j); + for (int j = max(0, index - FilterLength); j < min(index, a_length); j++) { + vec2 iq = sample_rf(in_offset + j); + FILTER_TYPE h = filter_coefficients[index - j]; #if (ShaderFlags & ShaderFlags_Demodulate) result += scale * apply_filter(rotate_iq(iq * vec2(1, -1), -j), h); #else