ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: c9659b840470437ade0be40050d77d8c237c72b9
Parent: 56b8c6e685692d301c3340fa8eccc0d303d6e100
Author: Randy Palamar
Date:   Wed, 19 Jun 2024 17:29:08 -0600

GPU hadamard decoding

Diffstat:
Mbeamformer.c | 55++++++++++++++++++++++++++++++++++++++++++++++++-------
Mmain.c | 45+++++++++++++++++++++++++++++++++++++--------
Ashaders/hadamard.glsl | 45+++++++++++++++++++++++++++++++++++++++++++++
Mutil.c | 109-------------------------------------------------------------------------------
Autil.h | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 244 insertions(+), 124 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -13,7 +13,7 @@ #include <GL/glext.h> #endif -#include "util.c" +#include "util.h" static void do_compute_shader(BeamformerCtx *ctx, u32 rf_ssbo_idx, enum compute_shaders shader) @@ -21,15 +21,48 @@ do_compute_shader(BeamformerCtx *ctx, u32 rf_ssbo_idx, enum compute_shaders shad ComputeShaderCtx *csctx = &ctx->csctx; glUseProgram(csctx->programs[shader]); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[rf_ssbo_idx]); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ctx->out_data_ssbo); - glUniform3uiv(csctx->rf_data_dim_id, 1, csctx->rf_data_dim.E); glUniform3uiv(csctx->out_data_dim_id, 1, ctx->out_data_dim.E); - glDispatchCompute(ctx->out_data_dim.x, ctx->out_data_dim.y, ctx->out_data_dim.z); + /* NOTE: Temporary flag for testing */ + u32 data_idx = ctx->flags & DO_DECODE? 2 : rf_ssbo_idx; + + switch (shader) { + case CS_HADAMARD: + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[rf_ssbo_idx]); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[2]); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo); + glDispatchCompute(csctx->rf_data_dim.x, csctx->rf_data_dim.y, csctx->rf_data_dim.z); + break; + case CS_UFORCES: + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[data_idx]); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ctx->out_data_ssbo); + glDispatchCompute(ctx->out_data_dim.x, ctx->out_data_dim.y, ctx->out_data_dim.z); + break; + default: ASSERT(0); + } } +static void +draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Font font) +{ + DrawFPS(20, 20); + + u32 fontsize = 32; + u32 fontspace = 4; + s8 decode_txt = s8alloc(&arena, 64); + s8 compute_txt = s8alloc(&arena, 64); + snprintf((char *)decode_txt.data, decode_txt.len, "Decoding: %d", !!(ctx->flags & DO_DECODE)); + snprintf((char *)compute_txt.data, compute_txt.len, "Compute: %d", !!(ctx->flags & DO_COMPUTE)); + v2 decode_fs = {.rl = MeasureTextEx(font, (char *)decode_txt.data, fontsize, fontspace)}; + v2 compute_fs = {.rl = MeasureTextEx(font, (char *)compute_txt.data, fontsize, fontspace)}; + v2 dpos = {.x = 20, .y = ctx->window_size.y - decode_fs.y - compute_fs.y - fontspace}; + DrawTextEx(font, (char *)decode_txt.data, dpos.rl, fontsize, fontspace, RED); + dpos.y += 2 + decode_fs.y; + DrawTextEx(font, (char *)compute_txt.data, dpos.rl, fontsize, fontspace, RED); +} + + DEBUG_EXPORT void do_beamformer(BeamformerCtx *ctx, Arena arena, s8 rf_data) { @@ -70,7 +103,11 @@ do_beamformer(BeamformerCtx *ctx, Arena arena, s8 rf_data) glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.rf_data_ssbos[!rf_ssbo_idx]); glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, rf_data.len, rf_data.data); - do_compute_shader(ctx, rf_ssbo_idx, CS_UFORCES); + if (ctx->flags & DO_COMPUTE) { + if (ctx->flags & DO_DECODE) + do_compute_shader(ctx, rf_ssbo_idx, CS_HADAMARD); + do_compute_shader(ctx, rf_ssbo_idx, CS_UFORCES); + } BeginDrawing(); @@ -83,11 +120,15 @@ do_beamformer(BeamformerCtx *ctx, Arena arena, s8 rf_data) DrawTexture(ctx->fsctx.output, 0, 0, WHITE); EndShaderMode(); - DrawFPS(20, 20); DrawTextEx(font, txt[txt_idx], pos.rl, 60, 6, BLACK); + draw_debug_overlay(ctx, arena, font); EndDrawing(); if (IsKeyPressed(KEY_R)) ctx->flags |= RELOAD_SHADERS; + if (IsKeyPressed(KEY_SPACE)) + ctx->flags ^= DO_COMPUTE; + if (IsKeyPressed(KEY_D)) + ctx->flags ^= DO_DECODE; } diff --git a/main.c b/main.c @@ -10,12 +10,13 @@ #include <GL/glcorearb.h> #include <GL/glext.h> -#include "util.c" +#include "util.h" #include "os_unix.c" static char *compute_shader_paths[CS_LAST] = { //[CS_MIN_MAX] = "shaders/min_max.glsl", - [CS_UFORCES] = "shaders/uforces.glsl", + [CS_HADAMARD] = "shaders/hadamard.glsl", + [CS_UFORCES] = "shaders/uforces.glsl", }; #ifndef _DEBUG @@ -76,6 +77,27 @@ do_debug(void) #endif /* _DEBUG */ +static void +fill_hadamard(i32 *m, u32 dim) +{ + /* bit hack to check if dim is power of 2 */ + ASSERT(!(dim & (dim - 1)) && dim); + + #define IND(i, j) ((i) * dim + (j)) + m[0] = 1; + for (u32 k = 1; k < dim; k *= 2) { + for (u32 i = 0; i < k; i++) { + for (u32 j = 0; j < k; j++) { + i32 val = m[IND(i, j)]; + m[IND(i + k, j)] = val; + m[IND(i, j + k)] = val; + m[IND(i + k, j + k)] = -val; + } + } + } + #undef IND +} + #if 0 static void update_output_image_dimensions(BeamformerCtx *ctx, uv2 new_size) @@ -95,7 +117,7 @@ update_output_image_dimensions(BeamformerCtx *ctx, uv2 new_size) #endif static void -init_compute_shader_ctx(ComputeShaderCtx *ctx, uv3 rf_data_dim) +init_compute_shader_ctx(ComputeShaderCtx *ctx, Arena a, uv3 rf_data_dim) { for (u32 i = 0; i < ARRAY_COUNT(ctx->programs); i++) { char *shader_text = LoadFileText(compute_shader_paths[i]); @@ -113,6 +135,13 @@ init_compute_shader_ctx(ComputeShaderCtx *ctx, uv3 rf_data_dim) for (u32 i = 0; i < ARRAY_COUNT(ctx->rf_data_ssbos); i++) ctx->rf_data_ssbos[i] = rlLoadShaderBuffer(rf_data_size, NULL, GL_DYNAMIC_COPY); ctx->rf_data_idx = 0; + + /* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */ + ctx->hadamard_dim = (uv2){ .x = rf_data_dim.y, .y = rf_data_dim.y }; + size hadamard_elements = ctx->hadamard_dim.x * ctx->hadamard_dim.y; + i32 *hadamard = alloc(&a, i32, hadamard_elements); + fill_hadamard(hadamard, ctx->hadamard_dim.x); + ctx->hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard, GL_DYNAMIC_COPY); } static void @@ -200,11 +229,11 @@ main(void) { BeamformerCtx ctx = {0}; - Arena temp_arena = os_new_arena(256 * MEGABYTE); + Arena temp_memory = os_new_arena(256 * MEGABYTE); char *decoded_name = "/tmp/decoded.bin"; os_file_stats decoded_stats = os_get_file_stats(decoded_name); - s8 raw_rf_data = os_read_file(&temp_arena, decoded_name, decoded_stats.filesize); + s8 raw_rf_data = os_read_file(&temp_memory, decoded_name, decoded_stats.filesize); ctx.window_size = (uv2){.w = 720, .h = 720}; ctx.out_data_dim = (uv3){.w = 720, .h = 720, .d = 1}; @@ -217,7 +246,7 @@ main(void) size out_data_size = ctx.out_data_dim.w * ctx.out_data_dim.h * ctx.out_data_dim.d * sizeof(f32); ctx.out_data_ssbo = rlLoadShaderBuffer(out_data_size, NULL, GL_DYNAMIC_COPY); - init_compute_shader_ctx(&ctx.csctx, (uv3){.w = 4093, .h = 128, .d = 1}); + init_compute_shader_ctx(&ctx.csctx, temp_memory, (uv3){.w = 4093, .h = 128, .d = 1}); init_fragment_shader_ctx(&ctx.fsctx, ctx.out_data_dim); ctx.flags |= RELOAD_SHADERS; @@ -227,9 +256,9 @@ main(void) if (ctx.flags & RELOAD_SHADERS) { ctx.flags &= ~RELOAD_SHADERS; - reload_shaders(&ctx, temp_arena); + reload_shaders(&ctx, temp_memory); } - do_beamformer(&ctx, temp_arena, raw_rf_data); + do_beamformer(&ctx, temp_memory, raw_rf_data); } } diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl @@ -0,0 +1,45 @@ +#version 460 core +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 1) readonly restrict buffer buffer_1 { + float rf_data[]; +}; + +layout(std430, binding = 2) writeonly restrict buffer buffer_2 { + float out_data[]; +}; + +layout(std430, binding = 3) readonly restrict buffer buffer_3 { + int hadamard[]; +}; + +layout(location = 3) uniform uvec3 u_rf_data_dim; + +void main() +{ + /* NOTE: each invocation takes a time sample (row) and a receive channel (column). + * it does the dot product of that column with the equivalent row of the hadamard matrix. + * the result is stored to the same row, column index of the output data. + */ + uint time_sample = gl_GlobalInvocationID.x; + uint column = gl_GlobalInvocationID.y; + + /* offset to get the correct column in hadamard matrix */ + uint hoff = column * u_rf_data_dim.y; + + /* TODO: make sure incoming data is organized so that stride is 1 + * i.e. each column should be a single time sample for all channels + * alternatively we can tell opengl to store the rf data in row major order + */ + + /* offset to get the time sample and row in rf data */ + uint rstride = u_rf_data_dim.x; + uint rfoff = column * rstride + time_sample; + + /* N-D dot product */ + float sum = 0; + for (int i = 0; i < u_rf_data_dim.y; i++) + sum += hadamard[hoff + i] * rf_data[rfoff + rstride * i]; + + out_data[rfoff] = float(sum); +} diff --git a/util.c b/util.c @@ -1,111 +1,4 @@ /* See LICENSE for license details. */ - -#ifndef _UTIL_C_ -#define _UTIL_C_ - -#include <stdint.h> -#include <stddef.h> - -#ifdef _DEBUG -#define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0); -#define DEBUG_EXPORT -#else -#define ASSERT(c) -#define DEBUG_EXPORT static -#endif - -typedef uint8_t u8; -typedef int32_t i32; -typedef uint32_t u32; -typedef uint32_t b32; -typedef float f32; -typedef double f64; -typedef ptrdiff_t size; - -typedef struct { u8 *beg, *end; } Arena; - -typedef struct { size len; u8 *data; } s8; - -typedef union { - struct { u32 x, y; }; - struct { u32 w, h; }; - u32 E[2]; -} uv2; - -typedef union { - struct { u32 x, y, z; }; - struct { u32 w, h, d; }; - u32 E[3]; -} uv3; - -typedef union { - struct { f32 x, y; }; - f32 E[2]; - Vector2 rl; -} v2; - -typedef union { - struct { f32 x, y, z, w; }; - struct { f32 r, g, b, a; }; - f32 E[4]; - Vector4 rl; -} v4; - -enum compute_shaders { -// CS_FORCES, -// CS_HADAMARD_DECODE, -// CS_HERCULES, -// CS_MIN_MAX, - CS_UFORCES, - CS_LAST -}; - -enum program_flags { - RELOAD_SHADERS = 1 << 0, -}; - -typedef struct { - u32 programs[CS_LAST]; - - /* need 3 storage buffers: incoming rf data, currently decoding rf data, decoded data. - * last buffer will always be for decoded data. other two will swap everytime there - * is new data. current operating idx is stored in rf_data_idx (0 or 1) which needs - * to be accessed atomically - */ - u32 rf_data_ssbos[3]; - _Atomic u32 rf_data_idx; - - uv3 rf_data_dim; - i32 rf_data_dim_id; - i32 out_data_dim_id; -} ComputeShaderCtx; - -typedef struct { - Shader shader; - Texture2D output; - i32 out_data_dim_id; -} FragmentShaderCtx; - -typedef struct { - uv2 window_size; - u32 flags; - - Color bg, fg; - - u32 out_data_ssbo; - uv3 out_data_dim; - - ComputeShaderCtx csctx; - FragmentShaderCtx fsctx; -} BeamformerCtx; - -#define MEGABYTE (1024ULL * 1024ULL) -#define GIGABYTE (1024ULL * 1024ULL * 1024ULL) - -#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a)) -#define ABS(x) ((x) < 0 ? (-x) : (x)) -#define CLAMP(x, a, b) ((x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)) - static void __attribute__((noreturn)) die(char *fmt, ...) { @@ -147,5 +40,3 @@ s8alloc(Arena *a, size len) { return (s8){ .data = alloc(a, u8, len), .len = len }; } - -#endif /*_UTIL_C_ */ diff --git a/util.h b/util.h @@ -0,0 +1,114 @@ +/* See LICENSE for license details. */ +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#include <stdint.h> +#include <stddef.h> + +#ifdef _DEBUG +#define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0); +#define DEBUG_EXPORT +#else +#define ASSERT(c) +#define DEBUG_EXPORT static +#endif + +typedef uint8_t u8; +typedef int32_t i32; +typedef uint32_t u32; +typedef uint32_t b32; +typedef float f32; +typedef double f64; +typedef ptrdiff_t size; + +typedef struct { u8 *beg, *end; } Arena; + +typedef struct { size len; u8 *data; } s8; + +typedef union { + struct { u32 x, y; }; + struct { u32 w, h; }; + u32 E[2]; +} uv2; + +typedef union { + struct { u32 x, y, z; }; + struct { u32 w, h, d; }; + u32 E[3]; +} uv3; + +typedef union { + struct { f32 x, y; }; + f32 E[2]; + Vector2 rl; +} v2; + +typedef union { + struct { f32 x, y, z, w; }; + struct { f32 r, g, b, a; }; + f32 E[4]; + Vector4 rl; +} v4; + +enum compute_shaders { +// CS_FORCES, + CS_HADAMARD, +// CS_HERCULES, +// CS_MIN_MAX, + CS_UFORCES, + CS_LAST +}; + +enum program_flags { + RELOAD_SHADERS = 1 << 0, + DO_COMPUTE = 1 << 1, + DO_DECODE = 1 << 2, +}; + +typedef struct { + u32 programs[CS_LAST]; + + /* NOTE: need 3 storage buffers: incoming rf, currently decoding rf, decoded. + * last buffer will always be for decoded data. other two will swap everytime there + * is new data. current operating idx is stored in rf_data_idx (0 or 1) which needs + * to be accessed atomically + */ + u32 rf_data_ssbos[3]; + _Atomic u32 rf_data_idx; + + u32 hadamard_ssbo; + uv2 hadamard_dim; + + uv3 rf_data_dim; + i32 rf_data_dim_id; + i32 out_data_dim_id; +} ComputeShaderCtx; + +typedef struct { + Shader shader; + Texture2D output; + i32 out_data_dim_id; +} FragmentShaderCtx; + +typedef struct { + uv2 window_size; + u32 flags; + + Color bg, fg; + + u32 out_data_ssbo; + uv3 out_data_dim; + + ComputeShaderCtx csctx; + FragmentShaderCtx fsctx; +} BeamformerCtx; + +#define MEGABYTE (1024ULL * 1024ULL) +#define GIGABYTE (1024ULL * 1024ULL * 1024ULL) + +#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a)) +#define ABS(x) ((x) < 0 ? (-x) : (x)) +#define CLAMP(x, a, b) ((x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)) + +#include "util.c" +#endif /*_UTIL_H_ */