Commit: 5a224469f4cc28ed30fd334bf9447a595b821f6b
Parent: 5113783f223d60f07aeff9f20d4b03fec09006ae
Author: Randy Palamar
Date: Tue, 25 Mar 2025 15:27:03 -0600
core: decode: support decoding of complex floats
Diffstat:
5 files changed, 77 insertions(+), 65 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -348,6 +348,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, Compute
switch (shader) {
case CS_DECODE:
case CS_DECODE_FLOAT:
+ case CS_DECODE_FLOAT_COMPLEX:
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
glBindImageTexture(0, csctx->hadamard_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I);
@@ -514,8 +515,10 @@ push_compute_shader_header(Arena *a, ComputeShaderID shader)
DAS_TYPES
#undef X
} break;
- case CS_DECODE_FLOAT: {
- push_s8(a, s8("#define INPUT_DATA_TYPE_FLOAT\n\n"));
+ case CS_DECODE_FLOAT:
+ case CS_DECODE_FLOAT_COMPLEX: {
+ if (shader == CS_DECODE_FLOAT) push_s8(a, s8("#define INPUT_DATA_TYPE_FLOAT\n\n"));
+ else push_s8(a, s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n"));
} /* FALLTHROUGH */
case CS_DECODE: {
#define X(type, id, pretty) push_s8(a, s8("#define DECODE_MODE_" #type " " #id "\n"));
@@ -530,7 +533,7 @@ push_compute_shader_header(Arena *a, ComputeShaderID shader)
}
static b32
-reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *csr, Arena tmp)
+reload_compute_shader(BeamformerCtx *ctx, s8 path, s8 extra, ComputeShaderReloadContext *csr, Arena tmp)
{
ComputeShaderCtx *cs = &ctx->csctx;
b32 result = 0;
@@ -552,6 +555,7 @@ reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *c
Stream buf = arena_stream(&tmp);
stream_append_s8(&buf, s8("loaded: "));
stream_append_s8(&buf, path);
+ stream_append_s8(&buf, extra);
stream_append_byte(&buf, '\n');
ctx->os.write_file(ctx->os.stderr, stream_to_s8(&buf));
glDeleteProgram(cs->programs[csr->shader]);
@@ -567,6 +571,7 @@ reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *c
Stream buf = arena_stream(&tmp);
stream_append_s8(&buf, s8("failed to load: "));
stream_append_s8(&buf, path);
+ stream_append_s8(&buf, extra);
stream_append_byte(&buf, '\n');
ctx->os.write_file(ctx->os.stderr, stream_to_s8(&buf));
}
@@ -588,10 +593,13 @@ DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute)
switch (work->type) {
case BW_RELOAD_SHADER: {
ComputeShaderReloadContext *csr = work->reload_shader_ctx;
- b32 success = reload_compute_shader(ctx, csr->path, csr, arena);
+ b32 success = reload_compute_shader(ctx, csr->path, s8(""), csr, arena);
if (csr->shader == CS_DECODE) {
+ /* TODO(rnp): think of a better way of doing this */
+ csr->shader = CS_DECODE_FLOAT_COMPLEX;
+ success &= reload_compute_shader(ctx, csr->path, s8(" (F32C)"), csr, arena);
csr->shader = CS_DECODE_FLOAT;
- success &= reload_compute_shader(ctx, csr->path, csr, arena);
+ success &= reload_compute_shader(ctx, csr->path, s8(" (F32)"), csr, arena);
csr->shader = CS_DECODE;
}
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -2,14 +2,15 @@
/* X(enumarant, number, shader file name, needs header, pretty name) */
#define COMPUTE_SHADERS \
- X(CUDA_DECODE, 0, "", 0, "CUDA Decoding") \
- X(CUDA_HILBERT, 1, "", 0, "CUDA Hilbert") \
- X(DAS, 2, "das", 1, "DAS") \
- X(DECODE, 3, "decode", 1, "Decoding") \
- X(DECODE_FLOAT, 4, "", 1, "Decoding (F32)") \
- X(DEMOD, 5, "demod", 1, "Demodulation") \
- X(MIN_MAX, 6, "min_max", 0, "Min/Max") \
- X(SUM, 7, "sum", 0, "Sum")
+ X(CUDA_DECODE, 0, "", 0, "CUDA Decoding") \
+ X(CUDA_HILBERT, 1, "", 0, "CUDA Hilbert") \
+ X(DAS, 2, "das", 1, "DAS") \
+ X(DECODE, 3, "decode", 1, "Decoding") \
+ X(DECODE_FLOAT, 4, "", 1, "Decoding (F32)") \
+ X(DECODE_FLOAT_COMPLEX, 5, "", 1, "Decoding (F32C)") \
+ X(DEMOD, 6, "demod", 1, "Demodulation") \
+ X(MIN_MAX, 7, "min_max", 0, "Min/Max") \
+ X(SUM, 8, "sum", 0, "Sum")
typedef enum {
#define X(e, n, s, h, pn) CS_ ##e = n,
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -281,21 +281,17 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count)
return 0;
for (i32 i = 0; i < stages_count; i++) {
- switch (stages[i]) {
- case CS_CUDA_DECODE:
- case CS_CUDA_HILBERT:
- case CS_DAS:
- case CS_DEMOD:
- case CS_DECODE:
- case CS_DECODE_FLOAT:
- case CS_MIN_MAX:
- case CS_SUM:
- g_bp->compute_stages[i] = stages[i];
- break;
- default:
+ b32 valid = 0;
+ #define X(en, number, sfn, nh, pn) if (number == stages[i]) valid = 1;
+ COMPUTE_SHADERS
+ #undef X
+
+ if (!valid) {
error_msg("invalid shader stage: %d", stages[i]);
return 0;
}
+
+ g_bp->compute_stages[i] = stages[i];
}
g_bp->compute_stages_count = stages_count;
@@ -415,30 +411,25 @@ beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, uv2 data
return result;
}
-b32
-beamform_data_synchronized_i16(char *pipe_name, char *shm_name, i16 *data, uv2 data_dim,
- uv4 output_points, f32 *out_data, i32 timeout_ms)
-{
- b32 result = 0;
- u64 data_size = data_dim.x * data_dim.y * sizeof(i16);
- if (data_size <= U32_MAX) {
- g_bp->raw.rf_raw_dim = data_dim;
- result = beamform_data_synchronized(pipe_name, shm_name, data, data_dim, data_size,
- output_points, out_data, timeout_ms);
- }
- return result;
+#define SYNCHRONIZED_FUNCTIONS \
+ X(i16, i16, 1) \
+ X(f32, f32, 1) \
+ X(f32_complex, f32, 2)
+
+#define X(name, type, scale) \
+b32 beamformer_data_synchronized_ ##name(char *pipe_name, char *shm_name, type *data, \
+ uv2 data_dim, uv4 output_points, f32 *out_data, \
+ i32 timeout_ms) \
+{ \
+ b32 result = 0; \
+ u64 data_size = data_dim.x * data_dim.y * sizeof(type) * scale; \
+ if (data_size <= U32_MAX) { \
+ g_bp->raw.rf_raw_dim = data_dim; \
+ result = beamform_data_synchronized(pipe_name, shm_name, data, data_dim, data_size, \
+ output_points, out_data, timeout_ms); \
+ } \
+ return result; \
}
-b32
-beamform_data_synchronized_f32(char *pipe_name, char *shm_name, f32 *data, uv2 data_dim,
- uv4 output_points, f32 *out_data, i32 timeout_ms)
-{
- b32 result = 0;
- u64 data_size = data_dim.x * data_dim.y * sizeof(f32);
- if (data_size <= U32_MAX) {
- g_bp->raw.rf_raw_dim = data_dim;
- result = beamform_data_synchronized(pipe_name, shm_name, data, data_dim, data_size,
- output_points, out_data, timeout_ms);
- }
- return result;
-}
+SYNCHRONIZED_FUNCTIONS
+#undef X
diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h
@@ -40,3 +40,7 @@ LIB_FN b32 beamform_data_synchronized_i16(char *pipe_name, char *shm_name, i16 *
LIB_FN b32 beamform_data_synchronized_f32(char *pipe_name, char *shm_name, f32 *data, uv2 data_dim,
uv4 output_points, f32 *out_data, i32 timeout_ms);
+
+LIB_FN b32 beamform_data_synchronized_f32_complex(char *pipe_name, char *shm_name, f32 *data,
+ uv2 data_dim, uv4 output_points, f32 *out_data,
+ i32 timeout_ms);
diff --git a/shaders/decode.glsl b/shaders/decode.glsl
@@ -1,12 +1,18 @@
/* See LICENSE for license details. */
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
-#ifdef INPUT_DATA_TYPE_FLOAT
-#define INPUT_DATA_TYPE float
-#define RF_DATA_STEP_SCALE 1
+#if defined(INPUT_DATA_TYPE_FLOAT)
+ #define INPUT_DATA_TYPE float
+ #define RF_SAMPLES_PER_INDEX 1
+ #define RESULT_TYPE_CAST(x) vec2(x, 0)
+#elif defined(INPUT_DATA_TYPE_FLOAT_COMPLEX)
+ #define INPUT_DATA_TYPE vec2
+ #define RF_SAMPLES_PER_INDEX 1
+ #define RESULT_TYPE_CAST(x) vec2(x)
#else
-#define INPUT_DATA_TYPE int
-#define RF_DATA_STEP_SCALE 2
+ #define INPUT_DATA_TYPE int
+ #define RF_SAMPLES_PER_INDEX 2
+ #define RESULT_TYPE_CAST(x) vec2(x, 0)
#endif
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
@@ -22,11 +28,13 @@ layout(r8i, binding = 0) readonly restrict uniform iimage2D hadamard;
INPUT_DATA_TYPE sample_rf_data(uint index, uint lfs)
{
INPUT_DATA_TYPE result;
- #ifdef INPUT_DATA_TYPE_FLOAT
+#if defined(INPUT_DATA_TYPE_FLOAT)
result = rf_data[index];
- #else
+#elif defined(INPUT_DATA_TYPE_FLOAT_COMPLEX)
+ result = rf_data[index];
+#else
result = (rf_data[index] << lfs) >> 16;
- #endif
+#endif
return result;
}
@@ -56,8 +64,8 @@ void main()
uint rf_off = rf_raw_dim.x * rf_channel + time_sample;
/* NOTE: rf_data index and stride considering the data is i16 not i32 */
- uint ridx = rf_off / RF_DATA_STEP_SCALE;
- uint ridx_delta = rf_stride / RF_DATA_STEP_SCALE;
+ uint ridx = rf_off / RF_SAMPLES_PER_INDEX;
+ uint ridx_delta = rf_stride / RF_SAMPLES_PER_INDEX;
/* NOTE: rf_data is i16 so each access grabs two time samples at time.
* We need to shift arithmetically (maintaining the sign) to get the
@@ -66,19 +74,19 @@ void main()
uint lfs = ((~time_sample) & 1u) * 16;
/* NOTE: Compute N-D dot product */
- float result = 0;
+ vec2 result = vec2(0);
switch (decode) {
case DECODE_MODE_NONE: {
- result = sample_rf_data(ridx + ridx_delta * acq, lfs);
+ result = RESULT_TYPE_CAST(sample_rf_data(ridx + ridx_delta * acq, lfs));
} break;
case DECODE_MODE_HADAMARD: {
- INPUT_DATA_TYPE sum = 0;
+ INPUT_DATA_TYPE sum = INPUT_DATA_TYPE(0);
for (int i = 0; i < dec_data_dim.z; i++) {
sum += imageLoad(hadamard, ivec2(i, acq)).x * sample_rf_data(ridx, lfs);
ridx += ridx_delta;
}
- result = float(sum) / float(dec_data_dim.z);
+ result = RESULT_TYPE_CAST(sum) / float(dec_data_dim.z);
} break;
}
- out_data[out_off] = vec2(result, 0);
+ out_data[out_off] = result;
}