ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 39b2d73eaaa13b7475bd1dc07b62040f789a6338
Parent: bdbe67bd0ab4213dbd91ff15733cb633aea6e335
Author: Randy Palamar
Date:   Fri, 12 Sep 2025 15:31:44 -0600

core/decode: fix raw RF pipeline

beamformer fully supports IQ data and RF data now depending on
which shader stages are requested.

note that only the common cases were tested. the others can be
fixed if they turn out to be broken

Diffstat:
Mbeamformer.c | 13++++---------
Mbeamformer.meta | 7++++++-
Mbuild.c | 12+++++++++---
Mgenerated/beamformer.meta.c | 77++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mshaders/decode.glsl | 48++++++++++++++++++++++--------------------------
5 files changed, 83 insertions(+), 74 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -1,9 +1,5 @@ /* See LICENSE for license details. */ /* TODO(rnp): - * [ ]: make decode output real values for real inputs and complex values for complex inputs - * - this means that das should have a RF version and an IQ version - * - this will also flip the current hack to support demodulate after decode to - * being a hack to support CudaHilbert after decode * [ ]: measure performance of doing channel mapping in a separate shader * [ ]: BeamformWorkQueue -> BeamformerWorkQueue * [ ]: need to keep track of gpu memory in some way @@ -488,7 +484,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) decode_data_kind = BeamformerDataKind_Float32Complex; } } - match = beamformer_shader_decode_match(decode_data_kind); + i32 local_flags = 0; + if (run_cuda_hilbert) local_flags |= BeamformerShaderDecodeFlags_DilateOutput; + match = beamformer_shader_decode_match(decode_data_kind, local_flags); commit = 1; }break; case BeamformerShaderKind_Demodulate:{ @@ -674,10 +672,7 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, BEAMFORMER_COMPUTE_UBO_LIST #undef X - u32 samples = pb->parameters.sample_count; - u32 channels = pb->parameters.channel_count; - u32 acquisitions = pb->parameters.acquisition_count; - u32 decoded_data_size = (u32)(2 * sizeof(f32) * samples * channels * acquisitions); + u32 decoded_data_size = cp->rf_size; if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size) alloc_shader_storage(ctx, decoded_data_size, arena); diff --git a/beamformer.meta b/beamformer.meta @@ -1,3 +1,4 @@ +@Enumeration(DataKind [Int16 Int16Complex Float32 Float32Complex]) @Enumeration(DecodeMode [None Hadamard]) @Enumeration(RCAOrientation [Rows Columns]) @@ -9,7 +10,11 @@ @Shader(decode.glsl) Decode { @Enumeration(DecodeMode) - @Permute(DataKind [Int16 Int16Complex Float32 Float32Complex]) + @PermuteFlags([DilateOutput]) + { + @Permute(DataKind [Int16]) + } + @Permute(DataKind [Int16Complex Float32 Float32Complex]) } @Shader(filter.glsl) Filter diff --git a/build.c b/build.c @@ -1426,9 +1426,15 @@ meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaSh u32 cursor = f->cursor.current; switch (e->kind) { case MetaEntryKind_PermuteFlags:{ - if (f->permutation_id == U32_MAX) - f->permutation_id = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[cursor], e); - sp->local_flags[local_flag_index++] = (u8)(1u << f->permutation_id); + if (f->permutation_id == U32_MAX) { + u32 test = cursor, packed = 0; + for EachBit(test, flag) { + u32 flag_index = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[flag], e); + packed |= (1u << flag_index); + } + f->permutation_id = packed; + } + sp->local_flags[local_flag_index++] = (u8)f->permutation_id; }break; case MetaEntryKind_Permute:{ if (f->permutation_id == U32_MAX) { diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -3,6 +3,14 @@ // GENERATED CODE typedef enum { + BeamformerDataKind_Int16 = 0, + BeamformerDataKind_Int16Complex = 1, + BeamformerDataKind_Float32 = 2, + BeamformerDataKind_Float32Complex = 3, + BeamformerDataKind_Count, +} BeamformerDataKind; + +typedef enum { BeamformerDecodeMode_None = 0, BeamformerDecodeMode_Hadamard = 1, BeamformerDecodeMode_Count, @@ -15,20 +23,16 @@ typedef enum { } BeamformerRCAOrientation; typedef enum { - BeamformerDataKind_Int16 = 0, - BeamformerDataKind_Int16Complex = 1, - BeamformerDataKind_Float32 = 2, - BeamformerDataKind_Float32Complex = 3, - BeamformerDataKind_Count, -} BeamformerDataKind; - -typedef enum { BeamformerSamplingMode_2X = 0, BeamformerSamplingMode_4X = 1, BeamformerSamplingMode_Count, } BeamformerSamplingMode; typedef enum { + BeamformerShaderDecodeFlags_DilateOutput = (1 << 0), +} BeamformerShaderDecodeFlags; + +typedef enum { BeamformerShaderFilterFlags_MapChannels = (1 << 0), BeamformerShaderFilterFlags_ComplexFilter = (1 << 1), BeamformerShaderFilterFlags_Demodulate = (1 << 2), @@ -83,10 +87,11 @@ read_only global i32 *beamformer_shader_match_vectors[] = { // CudaHilbert 0, // Decode - (i32 []){BeamformerDataKind_Int16}, - (i32 []){BeamformerDataKind_Int16Complex}, - (i32 []){BeamformerDataKind_Float32}, - (i32 []){BeamformerDataKind_Float32Complex}, + (i32 []){BeamformerDataKind_Int16, 0x00}, + (i32 []){BeamformerDataKind_Int16, 0x01}, + (i32 []){BeamformerDataKind_Int16Complex, 0x00}, + (i32 []){BeamformerDataKind_Float32, 0x00}, + (i32 []){BeamformerDataKind_Float32Complex, 0x00}, // Filter (i32 []){BeamformerDataKind_Int16Complex, 0x00}, (i32 []){BeamformerDataKind_Int16Complex, 0x01}, @@ -149,18 +154,18 @@ read_only global i32 *beamformer_shader_match_vectors[] = { // Render3D 0, }; -#define beamformer_match_vectors_count (61) +#define beamformer_match_vectors_count (62) read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { {0, 1, 0, 0, 0}, {1, 2, 0, 0, 0}, - {2, 6, 1, 2, 0}, - {6, 18, 1, 1, 1}, - {18, 42, 2, 2, 1}, - {42, 58, 1, 2, 1}, - {58, 59, 0, 0, 0}, + {2, 7, 1, 2, 1}, + {7, 19, 1, 1, 1}, + {19, 43, 2, 2, 1}, + {43, 59, 1, 2, 1}, {59, 60, 0, 0, 0}, {60, 61, 0, 0, 0}, + {61, 62, 0, 0, 0}, }; read_only global s8 beamformer_shader_names[] = { @@ -207,6 +212,12 @@ read_only global i32 beamformer_reloadable_render_shader_info_indices[] = { read_only global s8 beamformer_shader_global_header_strings[] = { s8_comp("" + "#define DataKind_Int16 0\n" + "#define DataKind_Int16Complex 1\n" + "#define DataKind_Float32 2\n" + "#define DataKind_Float32Complex 3\n" + "\n"), + s8_comp("" "#define DecodeMode_None 0\n" "#define DecodeMode_Hadamard 1\n" "\n"), @@ -215,19 +226,15 @@ read_only global s8 beamformer_shader_global_header_strings[] = { "#define RCAOrientation_Columns 1\n" "\n"), s8_comp("" - "#define DataKind_Int16 0\n" - "#define DataKind_Int16Complex 1\n" - "#define DataKind_Float32 2\n" - "#define DataKind_Float32Complex 3\n" - "\n"), - s8_comp("" "#define SamplingMode_2X 0\n" "#define SamplingMode_4X 1\n" "\n"), }; read_only global s8 beamformer_shader_local_header_strings[] = { - {0}, + s8_comp("" + "#define ShaderFlags_DilateOutput (1 << 0)\n" + "\n"), s8_comp("" "#define ShaderFlags_MapChannels (1 << 0)\n" "#define ShaderFlags_ComplexFilter (1 << 1)\n" @@ -247,19 +254,19 @@ read_only global s8 beamformer_shader_local_header_strings[] = { }; read_only global s8 beamformer_shader_descriptor_header_strings[] = { + s8_comp("DataKind"), s8_comp("DecodeMode"), s8_comp("RCAOrientation"), - s8_comp("DataKind"), s8_comp("SamplingMode"), }; read_only global i32 *beamformer_shader_header_vectors[] = { 0, 0, - (i32 []){2, 0}, - (i32 []){2}, - (i32 []){2, 3}, - (i32 []){2, 1}, + (i32 []){0, 1}, + (i32 []){0}, + (i32 []){0, 3}, + (i32 []){0, 2}, 0, 0, 0, @@ -288,30 +295,30 @@ beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_in } function iz -beamformer_shader_decode_match(BeamformerDataKind a) +beamformer_shader_decode_match(BeamformerDataKind a, i32 flags) { - iz result = beamformer_shader_match((i32 []){(i32)a}, 2, 6, 1); + iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 2, 7, 2); return result; } function iz beamformer_shader_filter_match(BeamformerDataKind a, i32 flags) { - iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 6, 18, 2); + iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 7, 19, 2); return result; } function iz beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode b, i32 flags) { - iz result = beamformer_shader_match((i32 []){(i32)a, (i32)b, flags}, 18, 42, 3); + iz result = beamformer_shader_match((i32 []){(i32)a, (i32)b, flags}, 19, 43, 3); return result; } function iz beamformer_shader_das_match(BeamformerDataKind a, i32 flags) { - iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 58, 2); + iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 43, 59, 2); return result; } diff --git a/shaders/decode.glsl b/shaders/decode.glsl @@ -10,33 +10,37 @@ #if DataKind == DataKind_Float32 #define INPUT_DATA_TYPE float - #define RF_SAMPLES_PER_INDEX 1 - #define RESULT_TYPE_CAST(x) vec4((x), 0, 0, 0) #define SAMPLE_DATA_TYPE float #define SAMPLE_TYPE_CAST(x) (x) #elif DataKind == DataKind_Float32Complex #define INPUT_DATA_TYPE vec2 - #define RF_SAMPLES_PER_INDEX 1 - #define RESULT_TYPE_CAST(x) vec4((x), 0, 0) #define SAMPLE_DATA_TYPE vec2 #define SAMPLE_TYPE_CAST(x) (x) #elif DataKind == DataKind_Int16Complex #define INPUT_DATA_TYPE int - #define RF_SAMPLES_PER_INDEX 1 - #define RESULT_TYPE_CAST(x) vec4((x), 0, 0) #define SAMPLE_DATA_TYPE vec2 #define SAMPLE_TYPE_CAST(x) vec2(((x) << 16) >> 16, (x) >> 16) -#else +#elif DataKind == DataKind_Int16 #define INPUT_DATA_TYPE int - #define RESULT_TYPE_CAST(x) (x) - /* NOTE(rnp): for i16 rf_data we decode 2 samples at once */ #define RF_SAMPLES_PER_INDEX 2 - #define SAMPLE_DATA_TYPE vec4 - #if defined(OUTPUT_DATA_TYPE_FLOAT) - #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, (x) >> 16, 0, 0) + #if (ShaderFlags & ShaderFlags_DilateOutput) + #define SAMPLE_DATA_TYPE vec4 + #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, 0, (x) >> 16, 0) #else - #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, 0, (x) >> 16, 0) + #define SAMPLE_DATA_TYPE vec2 + #define SAMPLE_TYPE_CAST(x) vec2(((x) << 16) >> 16, (x) >> 16) + #define OUTPUT_SAMPLES_PER_INDEX 2 #endif +#else + #error unsupported data kind for Decode +#endif + +#ifndef OUTPUT_SAMPLES_PER_INDEX + #define OUTPUT_SAMPLES_PER_INDEX 1 +#endif + +#ifndef RF_SAMPLES_PER_INDEX + #define RF_SAMPLES_PER_INDEX 1 #endif layout(std430, binding = 1) readonly restrict buffer buffer_1 { @@ -48,7 +52,7 @@ layout(std430, binding = 2) writeonly restrict buffer buffer_2 { }; layout(std430, binding = 3) writeonly restrict buffer buffer_3 { - vec2 out_data[]; + SAMPLE_DATA_TYPE out_data[]; }; layout(r8i, binding = 0) readonly restrict uniform iimage2D hadamard; @@ -75,32 +79,24 @@ void main() out_rf_data[rf_offset + transmit] = rf_data[in_off / RF_SAMPLES_PER_INDEX]; } } else { - #if defined(OUTPUT_DATA_TYPE_FLOAT) - /* NOTE(rnp): when outputting floats do not dilate the out time sample; - * output should end up densely packed */ - time_sample = gl_GlobalInvocationID.x; - #endif if (time_sample < output_transmit_stride) { uint out_off = output_channel_stride * channel + output_transmit_stride * transmit + output_sample_stride * time_sample; - vec4 result = vec4(0); + SAMPLE_DATA_TYPE result = SAMPLE_DATA_TYPE(0); switch (decode_mode) { case DecodeMode_None:{ - result = RESULT_TYPE_CAST(sample_rf_data(rf_offset + transmit)); + result = sample_rf_data(rf_offset + transmit); }break; case DecodeMode_Hadamard:{ SAMPLE_DATA_TYPE sum = SAMPLE_DATA_TYPE(0); for (int i = 0; i < imageSize(hadamard).x; i++) sum += imageLoad(hadamard, ivec2(i, transmit)).x * sample_rf_data(rf_offset++); - result = RESULT_TYPE_CAST(sum) / float(imageSize(hadamard).x); + result = sum / float(imageSize(hadamard).x); }break; } - out_data[out_off + 0] = result.xy; - #if RF_SAMPLES_PER_INDEX == 2 && !defined(OUTPUT_DATA_TYPE_FLOAT) - out_data[out_off + 1] = result.zw; - #endif + out_data[out_off / OUTPUT_SAMPLES_PER_INDEX] = result; } } }