ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 5a224469f4cc28ed30fd334bf9447a595b821f6b
Parent: 5113783f223d60f07aeff9f20d4b03fec09006ae
Author: Randy Palamar
Date:   Tue, 25 Mar 2025 15:27:03 -0600

core: decode: support decoding of complex floats

Diffstat:
Mbeamformer.c | 18+++++++++++++-----
Mbeamformer_parameters.h | 17+++++++++--------
Mhelpers/ogl_beamformer_lib.c | 65++++++++++++++++++++++++++++-------------------------------------
Mhelpers/ogl_beamformer_lib.h | 4++++
Mshaders/decode.glsl | 38+++++++++++++++++++++++---------------
5 files changed, 77 insertions(+), 65 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -348,6 +348,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, Compute switch (shader) { case CS_DECODE: case CS_DECODE_FLOAT: + case CS_DECODE_FLOAT_COMPLEX: glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); glBindImageTexture(0, csctx->hadamard_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I); @@ -514,8 +515,10 @@ push_compute_shader_header(Arena *a, ComputeShaderID shader) DAS_TYPES #undef X } break; - case CS_DECODE_FLOAT: { - push_s8(a, s8("#define INPUT_DATA_TYPE_FLOAT\n\n")); + case CS_DECODE_FLOAT: + case CS_DECODE_FLOAT_COMPLEX: { + if (shader == CS_DECODE_FLOAT) push_s8(a, s8("#define INPUT_DATA_TYPE_FLOAT\n\n")); + else push_s8(a, s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n")); } /* FALLTHROUGH */ case CS_DECODE: { #define X(type, id, pretty) push_s8(a, s8("#define DECODE_MODE_" #type " " #id "\n")); @@ -530,7 +533,7 @@ push_compute_shader_header(Arena *a, ComputeShaderID shader) } static b32 -reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *csr, Arena tmp) +reload_compute_shader(BeamformerCtx *ctx, s8 path, s8 extra, ComputeShaderReloadContext *csr, Arena tmp) { ComputeShaderCtx *cs = &ctx->csctx; b32 result = 0; @@ -552,6 +555,7 @@ reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *c Stream buf = arena_stream(&tmp); stream_append_s8(&buf, s8("loaded: ")); stream_append_s8(&buf, path); + stream_append_s8(&buf, extra); stream_append_byte(&buf, '\n'); ctx->os.write_file(ctx->os.stderr, stream_to_s8(&buf)); glDeleteProgram(cs->programs[csr->shader]); @@ -567,6 +571,7 @@ reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *c Stream buf = arena_stream(&tmp); stream_append_s8(&buf, s8("failed to load: ")); stream_append_s8(&buf, path); + stream_append_s8(&buf, extra); stream_append_byte(&buf, '\n'); ctx->os.write_file(ctx->os.stderr, stream_to_s8(&buf)); } @@ -588,10 +593,13 @@ DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) switch (work->type) { case BW_RELOAD_SHADER: { ComputeShaderReloadContext *csr = work->reload_shader_ctx; - b32 success = reload_compute_shader(ctx, csr->path, csr, arena); + b32 success = reload_compute_shader(ctx, csr->path, s8(""), csr, arena); if (csr->shader == CS_DECODE) { + /* TODO(rnp): think of a better way of doing this */ + csr->shader = CS_DECODE_FLOAT_COMPLEX; + success &= reload_compute_shader(ctx, csr->path, s8(" (F32C)"), csr, arena); csr->shader = CS_DECODE_FLOAT; - success &= reload_compute_shader(ctx, csr->path, csr, arena); + success &= reload_compute_shader(ctx, csr->path, s8(" (F32)"), csr, arena); csr->shader = CS_DECODE; } diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -2,14 +2,15 @@ /* X(enumarant, number, shader file name, needs header, pretty name) */ #define COMPUTE_SHADERS \ - X(CUDA_DECODE, 0, "", 0, "CUDA Decoding") \ - X(CUDA_HILBERT, 1, "", 0, "CUDA Hilbert") \ - X(DAS, 2, "das", 1, "DAS") \ - X(DECODE, 3, "decode", 1, "Decoding") \ - X(DECODE_FLOAT, 4, "", 1, "Decoding (F32)") \ - X(DEMOD, 5, "demod", 1, "Demodulation") \ - X(MIN_MAX, 6, "min_max", 0, "Min/Max") \ - X(SUM, 7, "sum", 0, "Sum") + X(CUDA_DECODE, 0, "", 0, "CUDA Decoding") \ + X(CUDA_HILBERT, 1, "", 0, "CUDA Hilbert") \ + X(DAS, 2, "das", 1, "DAS") \ + X(DECODE, 3, "decode", 1, "Decoding") \ + X(DECODE_FLOAT, 4, "", 1, "Decoding (F32)") \ + X(DECODE_FLOAT_COMPLEX, 5, "", 1, "Decoding (F32C)") \ + X(DEMOD, 6, "demod", 1, "Demodulation") \ + X(MIN_MAX, 7, "min_max", 0, "Min/Max") \ + X(SUM, 8, "sum", 0, "Sum") typedef enum { #define X(e, n, s, h, pn) CS_ ##e = n, diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -281,21 +281,17 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count) return 0; for (i32 i = 0; i < stages_count; i++) { - switch (stages[i]) { - case CS_CUDA_DECODE: - case CS_CUDA_HILBERT: - case CS_DAS: - case CS_DEMOD: - case CS_DECODE: - case CS_DECODE_FLOAT: - case CS_MIN_MAX: - case CS_SUM: - g_bp->compute_stages[i] = stages[i]; - break; - default: + b32 valid = 0; + #define X(en, number, sfn, nh, pn) if (number == stages[i]) valid = 1; + COMPUTE_SHADERS + #undef X + + if (!valid) { error_msg("invalid shader stage: %d", stages[i]); return 0; } + + g_bp->compute_stages[i] = stages[i]; } g_bp->compute_stages_count = stages_count; @@ -415,30 +411,25 @@ beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, uv2 data return result; } -b32 -beamform_data_synchronized_i16(char *pipe_name, char *shm_name, i16 *data, uv2 data_dim, - uv4 output_points, f32 *out_data, i32 timeout_ms) -{ - b32 result = 0; - u64 data_size = data_dim.x * data_dim.y * sizeof(i16); - if (data_size <= U32_MAX) { - g_bp->raw.rf_raw_dim = data_dim; - result = beamform_data_synchronized(pipe_name, shm_name, data, data_dim, data_size, - output_points, out_data, timeout_ms); - } - return result; +#define SYNCHRONIZED_FUNCTIONS \ + X(i16, i16, 1) \ + X(f32, f32, 1) \ + X(f32_complex, f32, 2) + +#define X(name, type, scale) \ +b32 beamformer_data_synchronized_ ##name(char *pipe_name, char *shm_name, type *data, \ + uv2 data_dim, uv4 output_points, f32 *out_data, \ + i32 timeout_ms) \ +{ \ + b32 result = 0; \ + u64 data_size = data_dim.x * data_dim.y * sizeof(type) * scale; \ + if (data_size <= U32_MAX) { \ + g_bp->raw.rf_raw_dim = data_dim; \ + result = beamform_data_synchronized(pipe_name, shm_name, data, data_dim, data_size, \ + output_points, out_data, timeout_ms); \ + } \ + return result; \ } -b32 -beamform_data_synchronized_f32(char *pipe_name, char *shm_name, f32 *data, uv2 data_dim, - uv4 output_points, f32 *out_data, i32 timeout_ms) -{ - b32 result = 0; - u64 data_size = data_dim.x * data_dim.y * sizeof(f32); - if (data_size <= U32_MAX) { - g_bp->raw.rf_raw_dim = data_dim; - result = beamform_data_synchronized(pipe_name, shm_name, data, data_dim, data_size, - output_points, out_data, timeout_ms); - } - return result; -} +SYNCHRONIZED_FUNCTIONS +#undef X diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -40,3 +40,7 @@ LIB_FN b32 beamform_data_synchronized_i16(char *pipe_name, char *shm_name, i16 * LIB_FN b32 beamform_data_synchronized_f32(char *pipe_name, char *shm_name, f32 *data, uv2 data_dim, uv4 output_points, f32 *out_data, i32 timeout_ms); + +LIB_FN b32 beamform_data_synchronized_f32_complex(char *pipe_name, char *shm_name, f32 *data, + uv2 data_dim, uv4 output_points, f32 *out_data, + i32 timeout_ms); diff --git a/shaders/decode.glsl b/shaders/decode.glsl @@ -1,12 +1,18 @@ /* See LICENSE for license details. */ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; -#ifdef INPUT_DATA_TYPE_FLOAT -#define INPUT_DATA_TYPE float -#define RF_DATA_STEP_SCALE 1 +#if defined(INPUT_DATA_TYPE_FLOAT) + #define INPUT_DATA_TYPE float + #define RF_SAMPLES_PER_INDEX 1 + #define RESULT_TYPE_CAST(x) vec2(x, 0) +#elif defined(INPUT_DATA_TYPE_FLOAT_COMPLEX) + #define INPUT_DATA_TYPE vec2 + #define RF_SAMPLES_PER_INDEX 1 + #define RESULT_TYPE_CAST(x) vec2(x) #else -#define INPUT_DATA_TYPE int -#define RF_DATA_STEP_SCALE 2 + #define INPUT_DATA_TYPE int + #define RF_SAMPLES_PER_INDEX 2 + #define RESULT_TYPE_CAST(x) vec2(x, 0) #endif layout(std430, binding = 1) readonly restrict buffer buffer_1 { @@ -22,11 +28,13 @@ layout(r8i, binding = 0) readonly restrict uniform iimage2D hadamard; INPUT_DATA_TYPE sample_rf_data(uint index, uint lfs) { INPUT_DATA_TYPE result; - #ifdef INPUT_DATA_TYPE_FLOAT +#if defined(INPUT_DATA_TYPE_FLOAT) result = rf_data[index]; - #else +#elif defined(INPUT_DATA_TYPE_FLOAT_COMPLEX) + result = rf_data[index]; +#else result = (rf_data[index] << lfs) >> 16; - #endif +#endif return result; } @@ -56,8 +64,8 @@ void main() uint rf_off = rf_raw_dim.x * rf_channel + time_sample; /* NOTE: rf_data index and stride considering the data is i16 not i32 */ - uint ridx = rf_off / RF_DATA_STEP_SCALE; - uint ridx_delta = rf_stride / RF_DATA_STEP_SCALE; + uint ridx = rf_off / RF_SAMPLES_PER_INDEX; + uint ridx_delta = rf_stride / RF_SAMPLES_PER_INDEX; /* NOTE: rf_data is i16 so each access grabs two time samples at time. * We need to shift arithmetically (maintaining the sign) to get the @@ -66,19 +74,19 @@ void main() uint lfs = ((~time_sample) & 1u) * 16; /* NOTE: Compute N-D dot product */ - float result = 0; + vec2 result = vec2(0); switch (decode) { case DECODE_MODE_NONE: { - result = sample_rf_data(ridx + ridx_delta * acq, lfs); + result = RESULT_TYPE_CAST(sample_rf_data(ridx + ridx_delta * acq, lfs)); } break; case DECODE_MODE_HADAMARD: { - INPUT_DATA_TYPE sum = 0; + INPUT_DATA_TYPE sum = INPUT_DATA_TYPE(0); for (int i = 0; i < dec_data_dim.z; i++) { sum += imageLoad(hadamard, ivec2(i, acq)).x * sample_rf_data(ridx, lfs); ridx += ridx_delta; } - result = float(sum) / float(dec_data_dim.z); + result = RESULT_TYPE_CAST(sum) / float(dec_data_dim.z); } break; } - out_data[out_off] = vec2(result, 0); + out_data[out_off] = result; }