ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 23d41865917638b402f4f30aa039f4d5837eabfe
Parent: 9ef1b3cab882b7f7ee89320fc54843347770d7d9
Author: Randy Palamar
Date:   Thu, 25 Sep 2025 16:46:01 -0600

meta: bake float variables as well

need to be a little careful with these not to lose precision but
otherwise there is no reason why they can't get baked as well.

This gives DAS another ~3% boost

Diffstat:
Mbeamformer.c | 56++++++++++++++++++++++++--------------------------------
Mbeamformer.h | 30+++++-------------------------
Mbeamformer.meta | 61++++++++++++++++++++++++++++++++++---------------------------
Mbuild.c | 43++++++++++++++++++++++++++++++++++---------
Mgenerated/beamformer.meta.c | 31+++++++++++++++++++++++++++----
Mshaders/das.glsl | 14+++++++-------
Mshaders/filter.glsl | 2+-
Mui.c | 3+--
8 files changed, 133 insertions(+), 107 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -448,16 +448,15 @@ das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO * du->voxel_transform = das_voxel_transform_matrix(bp); mem_copy(du->xdc_transform.E, bp->xdc_transform, sizeof(du->xdc_transform)); mem_copy(du->xdc_element_pitch.E, bp->xdc_element_pitch, sizeof(du->xdc_element_pitch)); - du->sampling_frequency = bp->sampling_frequency; - du->demodulation_frequency = bp->demodulation_frequency; - du->speed_of_sound = bp->speed_of_sound; - du->time_offset = bp->time_offset; - du->f_number = bp->f_number; - - cp->das_bake.shader_kind = bp->das_shader_id; - cp->das_bake.sample_count = bp->sample_count; - cp->das_bake.channel_count = bp->channel_count; - cp->das_bake.acquisition_count = bp->acquisition_count; + cp->das_bake.sampling_frequency = bp->sampling_frequency; + cp->das_bake.demodulation_frequency = bp->demodulation_frequency; + cp->das_bake.speed_of_sound = bp->speed_of_sound; + cp->das_bake.time_offset = bp->time_offset; + cp->das_bake.f_number = bp->f_number; + cp->das_bake.shader_kind = bp->das_shader_id; + cp->das_bake.sample_count = bp->sample_count; + cp->das_bake.channel_count = bp->channel_count; + cp->das_bake.acquisition_count = bp->acquisition_count; cp->das_bake.shader_flags = 0; if (bp->coherency_weighting) cp->das_bake.shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; @@ -522,7 +521,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake; BeamformerFilter *f = cp->filters + sp->filter_slot; - bp->time_offset += f->time_delay; + cp->das_bake.time_offset += f->time_delay; b->filter_length = (u32)f->length; b->sampling_mode = pb->parameters.sampling_mode; @@ -540,7 +539,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) BeamformerShaderFilterBakeParameters *b = &cp->filter_bake; BeamformerFilter *f = cp->filters + sp->filter_slot; - bp->time_offset += f->time_delay; + cp->das_bake.time_offset += f->time_delay; b->filter_length = (u32)f->length; b->shader_flags = 0; @@ -617,13 +616,12 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) */ if (demodulate) { BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake; - BeamformerFilterUBO *mp = &cp->demod_ubo_data; - mp->demodulation_frequency = bp->demodulation_frequency; - mp->sampling_frequency = bp->sampling_frequency / 2; - b->decimation_rate = decimation_rate; + b->demodulation_frequency = cp->das_bake.demodulation_frequency; + b->sampling_frequency = cp->das_bake.sampling_frequency / 2; + b->decimation_rate = decimation_rate; - bp->sampling_frequency /= 2 * (f32)b->decimation_rate; - cp->das_bake.sample_count /= 2 * b->decimation_rate; + cp->das_bake.sampling_frequency /= 2 * (f32)b->decimation_rate; + cp->das_bake.sample_count /= 2 * b->decimation_rate; if (decode_first) { b->input_channel_stride = dp->output_channel_stride; @@ -658,9 +656,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) /* TODO(rnp): UBO per filter stage */ BeamformerShaderFilterBakeParameters *fltb = &cp->filter_bake; - BeamformerFilterUBO *flt = &cp->filter_ubo_data; - flt->demodulation_frequency = bp->demodulation_frequency; - flt->sampling_frequency = bp->sampling_frequency; + fltb->demodulation_frequency = cp->das_bake.demodulation_frequency; + fltb->sampling_frequency = cp->das_bake.sampling_frequency; fltb->decimation_rate = 1; fltb->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; fltb->output_sample_stride = 1; @@ -727,10 +724,6 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl BEAMFORMER_DAS_UBO_PARAM_LIST "};\n\n" ), - [BeamformerShaderKind_Filter] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" - BEAMFORMER_FILTER_UBO_PARAM_LIST - "};\n\n" - ), #undef X }; @@ -753,8 +746,8 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]); - if (beamformer_shader_bake_parameter_name_counts[reloadable_index]) { - i32 count = beamformer_shader_bake_parameter_name_counts[reloadable_index]; + if (beamformer_shader_bake_parameter_counts[reloadable_index]) { + i32 count = beamformer_shader_bake_parameter_counts[reloadable_index]; u32 *parameters = 0; /* TODO(rnp): generate this */ switch (shader) { @@ -766,9 +759,11 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl } assert(parameters); - s8 *names = beamformer_shader_bake_parameter_names[reloadable_index]; + s8 *names = beamformer_shader_bake_parameter_names[reloadable_index]; + u8 *is_float = beamformer_shader_bake_parameter_is_float[reloadable_index]; for (i32 index = 0; index < count; index++) { - stream_append_s8s(&shader_stream, s8("#define "), names[index], s8(" (0x")); + stream_append_s8s(&shader_stream, s8("#define "), names[index], + is_float[index]? s8(" uintBitsToFloat") : s8(" "), s8("(0x")); stream_append_hex_u64(&shader_stream, parameters[index]); stream_append_s8(&shader_stream, s8(")\n")); } @@ -920,9 +915,6 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame b32 map_channels = (b->shader_flags & BeamformerShaderFilterFlags_MapChannels) != 0; - u32 index = shader == BeamformerShaderKind_Filter ? BeamformerComputeUBOKind_Filter - : BeamformerComputeUBOKind_Demodulate; - glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[index]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cp->filters[sp->filter_slot].ssbo); diff --git a/beamformer.h b/beamformer.h @@ -108,42 +108,22 @@ typedef struct { } BeamformerFilter; /* X(name, type, gltype) */ -#define BEAMFORMER_FILTER_UBO_PARAM_LIST \ - X(demodulation_frequency, f32, float) \ - X(sampling_frequency, f32, float) - -/* X(name, type, gltype) */ #define BEAMFORMER_DAS_UBO_PARAM_LIST \ - X(voxel_transform, m4, mat4) \ - X(xdc_transform, m4, mat4) \ - X(xdc_element_pitch, v2, vec2) \ - X(sampling_frequency, f32, float) \ - X(demodulation_frequency, f32, float) \ - X(speed_of_sound, f32, float) \ - X(time_offset, f32, float) \ - X(f_number, f32, float) - -typedef alignas(16) struct { - #define X(name, type, ...) type name; - BEAMFORMER_FILTER_UBO_PARAM_LIST - #undef X - float _pad[2]; -} BeamformerFilterUBO; -static_assert((sizeof(BeamformerFilterUBO) & 15) == 0, "UBO size must be a multiple of 16"); + X(voxel_transform, m4, mat4) \ + X(xdc_transform, m4, mat4) \ + X(xdc_element_pitch, v2, vec2) typedef alignas(16) struct { #define X(name, type, ...) type name; BEAMFORMER_DAS_UBO_PARAM_LIST #undef X - float _pad[1]; + float _pad[2]; } BeamformerDASUBO; static_assert((sizeof(BeamformerDASUBO) & 15) == 0, "UBO size must be a multiple of 16"); /* TODO(rnp): need 1 UBO per filter slot */ #define BEAMFORMER_COMPUTE_UBO_LIST \ - X(DAS, BeamformerDASUBO, das) \ - X(Filter, BeamformerFilterUBO, filter) \ - X(Demodulate, BeamformerFilterUBO, demod) + X(DAS, BeamformerDASUBO, das) #define X(k, ...) BeamformerComputeUBOKind_##k, typedef enum {BEAMFORMER_COMPUTE_UBO_LIST BeamformerComputeUBOKind_Count} BeamformerComputeUBOKind; diff --git a/beamformer.meta b/beamformer.meta @@ -16,16 +16,16 @@ @Bake { - @BakeVariable(DataKind data_kind ) - @BakeVariable(DecodeMode decode_mode ) - @BakeVariable(InputChannelStride input_channel_stride ) - @BakeVariable(InputSampleStride input_sample_stride ) - @BakeVariable(InputTransmitStride input_transmit_stride ) - @BakeVariable(OutputChannelStride output_channel_stride ) - @BakeVariable(OutputSampleStride output_sample_stride ) - @BakeVariable(OutputTransmitStride output_transmit_stride) - @BakeVariable(ShaderFlags shader_flags ) - @BakeVariable(TransmitCount transmit_count ) + @BakeInt(DataKind data_kind ) + @BakeInt(DecodeMode decode_mode ) + @BakeInt(InputChannelStride input_channel_stride ) + @BakeInt(InputSampleStride input_sample_stride ) + @BakeInt(InputTransmitStride input_transmit_stride ) + @BakeInt(OutputChannelStride output_channel_stride ) + @BakeInt(OutputSampleStride output_sample_stride ) + @BakeInt(OutputTransmitStride output_transmit_stride) + @BakeInt(ShaderFlags shader_flags ) + @BakeInt(TransmitCount transmit_count ) } } @@ -37,17 +37,19 @@ @Bake { - @BakeVariable(DataKind data_kind ) - @BakeVariable(DecimationRate decimation_rate ) - @BakeVariable(FilterLength filter_length ) - @BakeVariable(InputChannelStride input_channel_stride ) - @BakeVariable(InputSampleStride input_sample_stride ) - @BakeVariable(InputTransmitStride input_transmit_stride ) - @BakeVariable(OutputChannelStride output_channel_stride ) - @BakeVariable(OutputSampleStride output_sample_stride ) - @BakeVariable(OutputTransmitStride output_transmit_stride) - @BakeVariable(ShaderFlags shader_flags ) - @BakeVariable(SamplingMode sampling_mode ) + @BakeInt(DataKind data_kind ) + @BakeInt(DecimationRate decimation_rate ) + @BakeInt(FilterLength filter_length ) + @BakeInt(InputChannelStride input_channel_stride ) + @BakeInt(InputSampleStride input_sample_stride ) + @BakeInt(InputTransmitStride input_transmit_stride ) + @BakeInt(OutputChannelStride output_channel_stride ) + @BakeInt(OutputSampleStride output_sample_stride ) + @BakeInt(OutputTransmitStride output_transmit_stride) + @BakeInt(ShaderFlags shader_flags ) + @BakeInt(SamplingMode sampling_mode ) + @BakeFloat(DemodulationFrequency demodulation_frequency) + @BakeFloat(SamplingFrequency sampling_frequency ) } @SubShader Demodulate @@ -61,12 +63,17 @@ @Bake { - @BakeVariable(AcquisitionCount acquisition_count) - @BakeVariable(ChannelCount channel_count ) - @BakeVariable(DataKind data_kind ) - @BakeVariable(SampleCount sample_count ) - @BakeVariable(ShaderFlags shader_flags ) - @BakeVariable(ShaderKind shader_kind ) + @BakeInt(AcquisitionCount acquisition_count) + @BakeInt(ChannelCount channel_count ) + @BakeInt(DataKind data_kind ) + @BakeInt(SampleCount sample_count ) + @BakeInt(ShaderFlags shader_flags ) + @BakeInt(ShaderKind shader_kind ) + @BakeFloat(DemodulationFrequency demodulation_frequency) + @BakeFloat(FNumber f_number ) + @BakeFloat(SamplingFrequency sampling_frequency ) + @BakeFloat(SpeedOfSound speed_of_sound ) + @BakeFloat(TimeOffset time_offset ) } } diff --git a/build.c b/build.c @@ -821,7 +821,8 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path) #define META_ENTRY_KIND_LIST \ X(Invalid) \ X(Bake) \ - X(BakeVariable) \ + X(BakeInt) \ + X(BakeFloat) \ X(BeginScope) \ X(EndScope) \ X(Enumeration) \ @@ -1278,6 +1279,7 @@ typedef struct { typedef struct { s8 *names_upper; s8 *names_lower; + u8 *floating_point; u32 entry_count; u32 shader_id; } MetaShaderBakeParameters; @@ -1399,19 +1401,21 @@ meta_pack_shader_bake_parameters(MetaContext *ctx, MetaEntry *e, iz entry_count, assert(last->kind == MetaEntryKind_EndScope); for (MetaEntry *row = e + 2; row != last; row++) { - if (row->kind != MetaEntryKind_BakeVariable) + if (row->kind != MetaEntryKind_BakeInt && row->kind != MetaEntryKind_BakeFloat) meta_entry_nesting_error(row, MetaEntryKind_Bake); meta_entry_argument_expected(row, s8("name"), s8("name_lower")); bp->entry_count++; } - bp->names_upper = push_array(ctx->arena, s8, bp->entry_count); - bp->names_lower = push_array(ctx->arena, s8, bp->entry_count); + bp->names_upper = push_array(ctx->arena, s8, bp->entry_count); + bp->names_lower = push_array(ctx->arena, s8, bp->entry_count); + bp->floating_point = push_array(ctx->arena, u8, bp->entry_count); u32 row_index = 0; for (MetaEntry *row = e + 2; row != last; row++, row_index++) { - bp->names_upper[row_index] = row->arguments[0].string; - bp->names_lower[row_index] = row->arguments[1].string; + bp->names_upper[row_index] = row->arguments[0].string; + bp->names_lower[row_index] = row->arguments[1].string; + bp->floating_point[row_index] = row->kind == MetaEntryKind_BakeFloat; } } @@ -1798,8 +1802,10 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) ctx->shader_names.data[s->base_name_id], s8("BakeParameters")); meta_begin_scope(m, s8("typedef union {")); meta_begin_scope(m, s8("struct {")); - for (u32 entry = 0; entry < b->entry_count; entry++) - meta_push_line(m, s8("u32 "), b->names_lower[entry], s8(";")); + for (u32 entry = 0; entry < b->entry_count; entry++) { + s8 kind = b->floating_point[entry] ? s8("f32 ") : s8("u32 "); + meta_push_line(m, kind, b->names_lower[entry], s8(";")); + } meta_end_scope(m, s8("};")); meta_begin_line(m, s8("u32 E[")); meta_push_u64(m, b->entry_count); @@ -1871,7 +1877,26 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) } meta_end_scope(m, s8("};\n")); - meta_begin_scope(m, s8("read_only global i32 beamformer_shader_bake_parameter_name_counts[] = {")); + meta_begin_scope(m, s8("read_only global u8 *beamformer_shader_bake_parameter_is_float[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + MetaShader *s = bs->shader; + if (bs->file.len) { + if (s->bake_parameters) { + meta_begin_line(m, s8("(u8 []){")); + for (u32 index = 0; index < s->bake_parameters->entry_count; index++) { + if (index != 0) meta_push(m, s8(", ")); + meta_push(m, s->bake_parameters->floating_point[index] ? s8("1") : s8("0")); + } + meta_end_line(m, s8("},")); + } else { + meta_push_line(m, s8("0,")); + } + } + } + meta_end_scope(m, s8("};\n")); + + meta_begin_scope(m, s8("read_only global i32 beamformer_shader_bake_parameter_counts[] = {")); for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { MetaBaseShader *bs = ctx->base_shaders.data + shader; MetaShader *s = bs->shader; diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -94,8 +94,10 @@ typedef union { u32 output_transmit_stride; u32 shader_flags; u32 sampling_mode; + f32 demodulation_frequency; + f32 sampling_frequency; }; - u32 E[11]; + u32 E[13]; } BeamformerShaderFilterBakeParameters; typedef union { @@ -106,8 +108,13 @@ typedef union { u32 sample_count; u32 shader_flags; u32 shader_kind; + f32 demodulation_frequency; + f32 f_number; + f32 sampling_frequency; + f32 speed_of_sound; + f32 time_offset; }; - u32 E[6]; + u32 E[11]; } BeamformerShaderDASBakeParameters; read_only global s8 beamformer_shader_names[] = { @@ -248,6 +255,8 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("OutputTransmitStride"), s8_comp("ShaderFlags"), s8_comp("SamplingMode"), + s8_comp("DemodulationFrequency"), + s8_comp("SamplingFrequency"), }, (s8 []){ s8_comp("AcquisitionCount"), @@ -256,16 +265,30 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("SampleCount"), s8_comp("ShaderFlags"), s8_comp("ShaderKind"), + s8_comp("DemodulationFrequency"), + s8_comp("FNumber"), + s8_comp("SamplingFrequency"), + s8_comp("SpeedOfSound"), + s8_comp("TimeOffset"), }, 0, 0, 0, }; -read_only global i32 beamformer_shader_bake_parameter_name_counts[] = { +read_only global u8 *beamformer_shader_bake_parameter_is_float[] = { + (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + (u8 []){0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1}, + 0, + 0, + 0, +}; + +read_only global i32 beamformer_shader_bake_parameter_counts[] = { 10, + 13, 11, - 6, 0, 0, 0, diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -58,7 +58,7 @@ layout(r8i, binding = 3) readonly restrict uniform iimage1D transmit_receive_ #if DataKind == DataKind_Float32Complex vec2 rotate_iq(const vec2 iq, const float time) { - float arg = radians(360) * demodulation_frequency * time; + float arg = radians(360) * DemodulationFrequency * time; mat2 phasor = mat2( cos(arg), sin(arg), -sin(arg), cos(arg)); vec2 result = phasor * iq; @@ -108,14 +108,14 @@ SAMPLE_TYPE sample_rf(const int channel, const int transmit, const float index) int base_index = int(channel * SampleCount * AcquisitionCount + transmit * SampleCount); if (interpolate) result *= cubic(base_index, index); else result *= rf_data[base_index + int(round(index))]; - result = rotate_iq(result, index / sampling_frequency); + result = rotate_iq(result, index / SamplingFrequency); return result; } float sample_index(const float distance) { - float time = distance / speed_of_sound + time_offset; - return time * sampling_frequency; + float time = distance / SpeedOfSound + TimeOffset; + return time * SamplingFrequency; } float apodize(const float arg) @@ -179,7 +179,7 @@ RESULT_TYPE RCA(const vec3 world_point) for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) { vec3 rx_center = vec3(rx_channel * xdc_element_pitch, 0); vec2 receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows); - float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.y) * receive_vector.x); + float apodization = apodize(FNumber * radians(180) / abs(xdc_world_point.y) * receive_vector.x); if (apodization > 0) { float sidx = sample_index(transmit_distance + length(receive_vector)); @@ -210,7 +210,7 @@ RESULT_TYPE HERCULES(const vec3 world_point) if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0); else element_position = vec3(tx_channel, rx_channel, 0) * vec3(xdc_element_pitch, 0); - float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * + float apodization = apodize(FNumber * radians(180) / abs(xdc_world_point.z) * distance(xdc_world_point.xy, element_position.xy)); if (apodization > 0) { /* NOTE: tribal knowledge */ @@ -234,7 +234,7 @@ RESULT_TYPE FORCES(const vec3 world_point) vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) { float receive_distance = distance(xdc_world_point.xz, vec2(rx_channel * xdc_element_pitch.x, 0)); - float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * + float apodization = apodize(FNumber * radians(180) / abs(xdc_world_point.z) * (xdc_world_point.x - rx_channel * xdc_element_pitch.x)); if (apodization > 0) { for (int transmit = int(sparse); transmit < AcquisitionCount; transmit++) { diff --git a/shaders/filter.glsl b/shaders/filter.glsl @@ -61,7 +61,7 @@ vec2 rotate_iq(vec2 iq, int index) result = iq; }break; default:{ - float arg = radians(360) * demodulation_frequency * index / sampling_frequency; + float arg = radians(360) * DemodulationFrequency * index / SamplingFrequency; mat2 phasor = mat2(cos(arg), -sin(arg), sin(arg), cos(arg)); result = phasor * iq; diff --git a/ui.c b/ui.c @@ -1175,8 +1175,7 @@ add_beamformer_parameters_view(Variable *parent, BeamformerCtx *ctx) &bp->sampling_frequency, (v2){0}, 1e-6f, 0, 0, ui->font); add_beamformer_variable(ui, group, &ui->arena, s8("Demodulation Frequency:"), s8("[MHz]"), - &bp->demodulation_frequency, (v2){.y = 100e6f}, 1e-6f, 1e5f, - V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); + &bp->demodulation_frequency, (v2){.y = 100e6f}, 1e-6f, 0, 0, ui->font); add_beamformer_variable(ui, group, &ui->arena, s8("Speed of Sound:"), s8("[m/s]"), &bp->speed_of_sound, (v2){.y = 1e6f}, 1.0f, 10.0f,