Commit: c9659b840470437ade0be40050d77d8c237c72b9
Parent: 56b8c6e685692d301c3340fa8eccc0d303d6e100
Author: Randy Palamar
Date: Wed, 19 Jun 2024 17:29:08 -0600
GPU hadamard decoding
Diffstat:
M | beamformer.c | | | 55 | ++++++++++++++++++++++++++++++++++++++++++++++++------- |
M | main.c | | | 45 | +++++++++++++++++++++++++++++++++++++-------- |
A | shaders/hadamard.glsl | | | 45 | +++++++++++++++++++++++++++++++++++++++++++++ |
M | util.c | | | 109 | ------------------------------------------------------------------------------- |
A | util.h | | | 114 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 244 insertions(+), 124 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -13,7 +13,7 @@
#include <GL/glext.h>
#endif
-#include "util.c"
+#include "util.h"
static void
do_compute_shader(BeamformerCtx *ctx, u32 rf_ssbo_idx, enum compute_shaders shader)
@@ -21,15 +21,48 @@ do_compute_shader(BeamformerCtx *ctx, u32 rf_ssbo_idx, enum compute_shaders shad
ComputeShaderCtx *csctx = &ctx->csctx;
glUseProgram(csctx->programs[shader]);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[rf_ssbo_idx]);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ctx->out_data_ssbo);
-
glUniform3uiv(csctx->rf_data_dim_id, 1, csctx->rf_data_dim.E);
glUniform3uiv(csctx->out_data_dim_id, 1, ctx->out_data_dim.E);
- glDispatchCompute(ctx->out_data_dim.x, ctx->out_data_dim.y, ctx->out_data_dim.z);
+ /* NOTE: Temporary flag for testing */
+ u32 data_idx = ctx->flags & DO_DECODE? 2 : rf_ssbo_idx;
+
+ switch (shader) {
+ case CS_HADAMARD:
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[rf_ssbo_idx]);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[2]);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo);
+ glDispatchCompute(csctx->rf_data_dim.x, csctx->rf_data_dim.y, csctx->rf_data_dim.z);
+ break;
+ case CS_UFORCES:
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[data_idx]);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ctx->out_data_ssbo);
+ glDispatchCompute(ctx->out_data_dim.x, ctx->out_data_dim.y, ctx->out_data_dim.z);
+ break;
+ default: ASSERT(0);
+ }
}
+static void
+draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Font font)
+{
+ DrawFPS(20, 20);
+
+ u32 fontsize = 32;
+ u32 fontspace = 4;
+ s8 decode_txt = s8alloc(&arena, 64);
+ s8 compute_txt = s8alloc(&arena, 64);
+ snprintf((char *)decode_txt.data, decode_txt.len, "Decoding: %d", !!(ctx->flags & DO_DECODE));
+ snprintf((char *)compute_txt.data, compute_txt.len, "Compute: %d", !!(ctx->flags & DO_COMPUTE));
+ v2 decode_fs = {.rl = MeasureTextEx(font, (char *)decode_txt.data, fontsize, fontspace)};
+ v2 compute_fs = {.rl = MeasureTextEx(font, (char *)compute_txt.data, fontsize, fontspace)};
+ v2 dpos = {.x = 20, .y = ctx->window_size.y - decode_fs.y - compute_fs.y - fontspace};
+ DrawTextEx(font, (char *)decode_txt.data, dpos.rl, fontsize, fontspace, RED);
+ dpos.y += 2 + decode_fs.y;
+ DrawTextEx(font, (char *)compute_txt.data, dpos.rl, fontsize, fontspace, RED);
+}
+
+
DEBUG_EXPORT void
do_beamformer(BeamformerCtx *ctx, Arena arena, s8 rf_data)
{
@@ -70,7 +103,11 @@ do_beamformer(BeamformerCtx *ctx, Arena arena, s8 rf_data)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.rf_data_ssbos[!rf_ssbo_idx]);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, rf_data.len, rf_data.data);
- do_compute_shader(ctx, rf_ssbo_idx, CS_UFORCES);
+ if (ctx->flags & DO_COMPUTE) {
+ if (ctx->flags & DO_DECODE)
+ do_compute_shader(ctx, rf_ssbo_idx, CS_HADAMARD);
+ do_compute_shader(ctx, rf_ssbo_idx, CS_UFORCES);
+ }
BeginDrawing();
@@ -83,11 +120,15 @@ do_beamformer(BeamformerCtx *ctx, Arena arena, s8 rf_data)
DrawTexture(ctx->fsctx.output, 0, 0, WHITE);
EndShaderMode();
- DrawFPS(20, 20);
DrawTextEx(font, txt[txt_idx], pos.rl, 60, 6, BLACK);
+ draw_debug_overlay(ctx, arena, font);
EndDrawing();
if (IsKeyPressed(KEY_R))
ctx->flags |= RELOAD_SHADERS;
+ if (IsKeyPressed(KEY_SPACE))
+ ctx->flags ^= DO_COMPUTE;
+ if (IsKeyPressed(KEY_D))
+ ctx->flags ^= DO_DECODE;
}
diff --git a/main.c b/main.c
@@ -10,12 +10,13 @@
#include <GL/glcorearb.h>
#include <GL/glext.h>
-#include "util.c"
+#include "util.h"
#include "os_unix.c"
static char *compute_shader_paths[CS_LAST] = {
//[CS_MIN_MAX] = "shaders/min_max.glsl",
- [CS_UFORCES] = "shaders/uforces.glsl",
+ [CS_HADAMARD] = "shaders/hadamard.glsl",
+ [CS_UFORCES] = "shaders/uforces.glsl",
};
#ifndef _DEBUG
@@ -76,6 +77,27 @@ do_debug(void)
#endif /* _DEBUG */
+static void
+fill_hadamard(i32 *m, u32 dim)
+{
+ /* bit hack to check if dim is power of 2 */
+ ASSERT(!(dim & (dim - 1)) && dim);
+
+ #define IND(i, j) ((i) * dim + (j))
+ m[0] = 1;
+ for (u32 k = 1; k < dim; k *= 2) {
+ for (u32 i = 0; i < k; i++) {
+ for (u32 j = 0; j < k; j++) {
+ i32 val = m[IND(i, j)];
+ m[IND(i + k, j)] = val;
+ m[IND(i, j + k)] = val;
+ m[IND(i + k, j + k)] = -val;
+ }
+ }
+ }
+ #undef IND
+}
+
#if 0
static void
update_output_image_dimensions(BeamformerCtx *ctx, uv2 new_size)
@@ -95,7 +117,7 @@ update_output_image_dimensions(BeamformerCtx *ctx, uv2 new_size)
#endif
static void
-init_compute_shader_ctx(ComputeShaderCtx *ctx, uv3 rf_data_dim)
+init_compute_shader_ctx(ComputeShaderCtx *ctx, Arena a, uv3 rf_data_dim)
{
for (u32 i = 0; i < ARRAY_COUNT(ctx->programs); i++) {
char *shader_text = LoadFileText(compute_shader_paths[i]);
@@ -113,6 +135,13 @@ init_compute_shader_ctx(ComputeShaderCtx *ctx, uv3 rf_data_dim)
for (u32 i = 0; i < ARRAY_COUNT(ctx->rf_data_ssbos); i++)
ctx->rf_data_ssbos[i] = rlLoadShaderBuffer(rf_data_size, NULL, GL_DYNAMIC_COPY);
ctx->rf_data_idx = 0;
+
+ /* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */
+ ctx->hadamard_dim = (uv2){ .x = rf_data_dim.y, .y = rf_data_dim.y };
+ size hadamard_elements = ctx->hadamard_dim.x * ctx->hadamard_dim.y;
+ i32 *hadamard = alloc(&a, i32, hadamard_elements);
+ fill_hadamard(hadamard, ctx->hadamard_dim.x);
+ ctx->hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard, GL_DYNAMIC_COPY);
}
static void
@@ -200,11 +229,11 @@ main(void)
{
BeamformerCtx ctx = {0};
- Arena temp_arena = os_new_arena(256 * MEGABYTE);
+ Arena temp_memory = os_new_arena(256 * MEGABYTE);
char *decoded_name = "/tmp/decoded.bin";
os_file_stats decoded_stats = os_get_file_stats(decoded_name);
- s8 raw_rf_data = os_read_file(&temp_arena, decoded_name, decoded_stats.filesize);
+ s8 raw_rf_data = os_read_file(&temp_memory, decoded_name, decoded_stats.filesize);
ctx.window_size = (uv2){.w = 720, .h = 720};
ctx.out_data_dim = (uv3){.w = 720, .h = 720, .d = 1};
@@ -217,7 +246,7 @@ main(void)
size out_data_size = ctx.out_data_dim.w * ctx.out_data_dim.h * ctx.out_data_dim.d * sizeof(f32);
ctx.out_data_ssbo = rlLoadShaderBuffer(out_data_size, NULL, GL_DYNAMIC_COPY);
- init_compute_shader_ctx(&ctx.csctx, (uv3){.w = 4093, .h = 128, .d = 1});
+ init_compute_shader_ctx(&ctx.csctx, temp_memory, (uv3){.w = 4093, .h = 128, .d = 1});
init_fragment_shader_ctx(&ctx.fsctx, ctx.out_data_dim);
ctx.flags |= RELOAD_SHADERS;
@@ -227,9 +256,9 @@ main(void)
if (ctx.flags & RELOAD_SHADERS) {
ctx.flags &= ~RELOAD_SHADERS;
- reload_shaders(&ctx, temp_arena);
+ reload_shaders(&ctx, temp_memory);
}
- do_beamformer(&ctx, temp_arena, raw_rf_data);
+ do_beamformer(&ctx, temp_memory, raw_rf_data);
}
}
diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl
@@ -0,0 +1,45 @@
+#version 460 core
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(std430, binding = 1) readonly restrict buffer buffer_1 {
+ float rf_data[];
+};
+
+layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
+ float out_data[];
+};
+
+layout(std430, binding = 3) readonly restrict buffer buffer_3 {
+ int hadamard[];
+};
+
+layout(location = 3) uniform uvec3 u_rf_data_dim;
+
+void main()
+{
+ /* NOTE: each invocation takes a time sample (row) and a receive channel (column).
+ * it does the dot product of that column with the equivalent row of the hadamard matrix.
+ * the result is stored to the same row, column index of the output data.
+ */
+ uint time_sample = gl_GlobalInvocationID.x;
+ uint column = gl_GlobalInvocationID.y;
+
+ /* offset to get the correct column in hadamard matrix */
+ uint hoff = column * u_rf_data_dim.y;
+
+ /* TODO: make sure incoming data is organized so that stride is 1
+ * i.e. each column should be a single time sample for all channels
+ * alternatively we can tell opengl to store the rf data in row major order
+ */
+
+ /* offset to get the time sample and row in rf data */
+ uint rstride = u_rf_data_dim.x;
+ uint rfoff = column * rstride + time_sample;
+
+ /* N-D dot product */
+ float sum = 0;
+ for (int i = 0; i < u_rf_data_dim.y; i++)
+ sum += hadamard[hoff + i] * rf_data[rfoff + rstride * i];
+
+ out_data[rfoff] = float(sum);
+}
diff --git a/util.c b/util.c
@@ -1,111 +1,4 @@
/* See LICENSE for license details. */
-
-#ifndef _UTIL_C_
-#define _UTIL_C_
-
-#include <stdint.h>
-#include <stddef.h>
-
-#ifdef _DEBUG
-#define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0);
-#define DEBUG_EXPORT
-#else
-#define ASSERT(c)
-#define DEBUG_EXPORT static
-#endif
-
-typedef uint8_t u8;
-typedef int32_t i32;
-typedef uint32_t u32;
-typedef uint32_t b32;
-typedef float f32;
-typedef double f64;
-typedef ptrdiff_t size;
-
-typedef struct { u8 *beg, *end; } Arena;
-
-typedef struct { size len; u8 *data; } s8;
-
-typedef union {
- struct { u32 x, y; };
- struct { u32 w, h; };
- u32 E[2];
-} uv2;
-
-typedef union {
- struct { u32 x, y, z; };
- struct { u32 w, h, d; };
- u32 E[3];
-} uv3;
-
-typedef union {
- struct { f32 x, y; };
- f32 E[2];
- Vector2 rl;
-} v2;
-
-typedef union {
- struct { f32 x, y, z, w; };
- struct { f32 r, g, b, a; };
- f32 E[4];
- Vector4 rl;
-} v4;
-
-enum compute_shaders {
-// CS_FORCES,
-// CS_HADAMARD_DECODE,
-// CS_HERCULES,
-// CS_MIN_MAX,
- CS_UFORCES,
- CS_LAST
-};
-
-enum program_flags {
- RELOAD_SHADERS = 1 << 0,
-};
-
-typedef struct {
- u32 programs[CS_LAST];
-
- /* need 3 storage buffers: incoming rf data, currently decoding rf data, decoded data.
- * last buffer will always be for decoded data. other two will swap everytime there
- * is new data. current operating idx is stored in rf_data_idx (0 or 1) which needs
- * to be accessed atomically
- */
- u32 rf_data_ssbos[3];
- _Atomic u32 rf_data_idx;
-
- uv3 rf_data_dim;
- i32 rf_data_dim_id;
- i32 out_data_dim_id;
-} ComputeShaderCtx;
-
-typedef struct {
- Shader shader;
- Texture2D output;
- i32 out_data_dim_id;
-} FragmentShaderCtx;
-
-typedef struct {
- uv2 window_size;
- u32 flags;
-
- Color bg, fg;
-
- u32 out_data_ssbo;
- uv3 out_data_dim;
-
- ComputeShaderCtx csctx;
- FragmentShaderCtx fsctx;
-} BeamformerCtx;
-
-#define MEGABYTE (1024ULL * 1024ULL)
-#define GIGABYTE (1024ULL * 1024ULL * 1024ULL)
-
-#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a))
-#define ABS(x) ((x) < 0 ? (-x) : (x))
-#define CLAMP(x, a, b) ((x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x))
-
static void __attribute__((noreturn))
die(char *fmt, ...)
{
@@ -147,5 +40,3 @@ s8alloc(Arena *a, size len)
{
return (s8){ .data = alloc(a, u8, len), .len = len };
}
-
-#endif /*_UTIL_C_ */
diff --git a/util.h b/util.h
@@ -0,0 +1,114 @@
+/* See LICENSE for license details. */
+#ifndef _UTIL_H_
+#define _UTIL_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef _DEBUG
+#define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0);
+#define DEBUG_EXPORT
+#else
+#define ASSERT(c)
+#define DEBUG_EXPORT static
+#endif
+
+typedef uint8_t u8;
+typedef int32_t i32;
+typedef uint32_t u32;
+typedef uint32_t b32;
+typedef float f32;
+typedef double f64;
+typedef ptrdiff_t size;
+
+typedef struct { u8 *beg, *end; } Arena;
+
+typedef struct { size len; u8 *data; } s8;
+
+typedef union {
+ struct { u32 x, y; };
+ struct { u32 w, h; };
+ u32 E[2];
+} uv2;
+
+typedef union {
+ struct { u32 x, y, z; };
+ struct { u32 w, h, d; };
+ u32 E[3];
+} uv3;
+
+typedef union {
+ struct { f32 x, y; };
+ f32 E[2];
+ Vector2 rl;
+} v2;
+
+typedef union {
+ struct { f32 x, y, z, w; };
+ struct { f32 r, g, b, a; };
+ f32 E[4];
+ Vector4 rl;
+} v4;
+
+enum compute_shaders {
+// CS_FORCES,
+ CS_HADAMARD,
+// CS_HERCULES,
+// CS_MIN_MAX,
+ CS_UFORCES,
+ CS_LAST
+};
+
+enum program_flags {
+ RELOAD_SHADERS = 1 << 0,
+ DO_COMPUTE = 1 << 1,
+ DO_DECODE = 1 << 2,
+};
+
+typedef struct {
+ u32 programs[CS_LAST];
+
+ /* NOTE: need 3 storage buffers: incoming rf, currently decoding rf, decoded.
+ * last buffer will always be for decoded data. other two will swap everytime there
+ * is new data. current operating idx is stored in rf_data_idx (0 or 1) which needs
+ * to be accessed atomically
+ */
+ u32 rf_data_ssbos[3];
+ _Atomic u32 rf_data_idx;
+
+ u32 hadamard_ssbo;
+ uv2 hadamard_dim;
+
+ uv3 rf_data_dim;
+ i32 rf_data_dim_id;
+ i32 out_data_dim_id;
+} ComputeShaderCtx;
+
+typedef struct {
+ Shader shader;
+ Texture2D output;
+ i32 out_data_dim_id;
+} FragmentShaderCtx;
+
+typedef struct {
+ uv2 window_size;
+ u32 flags;
+
+ Color bg, fg;
+
+ u32 out_data_ssbo;
+ uv3 out_data_dim;
+
+ ComputeShaderCtx csctx;
+ FragmentShaderCtx fsctx;
+} BeamformerCtx;
+
+#define MEGABYTE (1024ULL * 1024ULL)
+#define GIGABYTE (1024ULL * 1024ULL * 1024ULL)
+
+#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a))
+#define ABS(x) ((x) < 0 ? (-x) : (x))
+#define CLAMP(x, a, b) ((x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x))
+
+#include "util.c"
+#endif /*_UTIL_H_ */