tests: add decode - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

Commit: ab1aa357b4c0427d168d75d1a05592de7e6e8f6f
Parent: e7cd9a0a86d4c44451cf5e85476e54cd71af96e4
Author: Randy Palamar
Date:   Fri, 20 Jun 2025 13:52:55 -0600

tests: add decode

This is useful for testing the library code without having any
data available.

Diffstat:
M build.c  | 1 +
A tests/decode.c  | 310 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 311 insertions(+), 0 deletions(-)
diff --git a/build.c b/build.c
@@ -861,6 +861,7 @@ build_tests(Arena arena)
 
 	#define TEST_PROGRAMS \
 		X("throughput", LINK_LIB("m"), LINK_LIB("zstd"), W32_DECL(LINK_LIB("Synchronization"))) \
+		X("decode", LINK_LIB("m"), W32_DECL(LINK_LIB("Synchronization"))) \
 
 	os_make_directory(OUTPUT("tests"));
 	if (!is_msvc) cmd_append(&arena, &cc, "-Wno-unused-function");
diff --git a/tests/decode.c b/tests/decode.c
@@ -0,0 +1,310 @@
+/* See LICENSE for license details. */
+#define LIB_FN function
+#include "ogl_beamformer_lib.c"
+
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define AVERAGE_SAMPLES countof(((BeamformerComputeStatsTable *)0)->times)
+//#define RF_TIME_SAMPLES 2432
+#define RF_TIME_SAMPLES 4096
+
+read_only global u32 decode_transmit_counts[] = {
+	2, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 80, 96, 128, 160, 192, 256
+};
+
+typedef struct {
+	b32 loop;
+	b32 cuda;
+	b32 once;
+	b32 dump;
+	b32 full_aperture;
+
+	u32 warmup_count;
+
+	char *outdir;
+
+	char **remaining;
+	i32    remaining_count;
+} Options;
+
+global b32 g_should_exit;
+
+#define die(...) die_((char *)__func__, __VA_ARGS__)
+function no_return void
+die_(char *function_name, char *format, ...)
+{
+	if (function_name)
+		fprintf(stderr, "%s: ", function_name);
+
+	va_list ap;
+
+	va_start(ap, format);
+	vfprintf(stderr, format, ap);
+	va_end(ap);
+
+	os_exit(1);
+}
+
+#if OS_LINUX
+
+function void
+os_make_directory(char *name)
+{
+	mkdir(name, 0770);
+}
+
+#elif OS_WINDOWS
+
+W32(b32) CreateDirectoryA(c8 *, void *);
+
+function void
+os_make_directory(char *name)
+{
+	CreateDirectoryA(name, 0);
+}
+
+#else
+#error Unsupported Platform
+#endif
+
+#define shift_n(v, c, n) v += n, c -= n
+#define shift(v, c)   shift_n(v, c, 1)
+#define unshift(v, c) shift_n(v, c, -1)
+
+function void
+usage(char *argv0)
+{
+	die("%s [--loop] [--once] [--full-aperture] [--cuda] [--warmup n] [--dump dir]\n"
+	    "    --loop:          reupload data forever\n"
+	    "    --once:          only run a single frame\n"
+	    "    --full-aperture: recieve on full 256 channel aperture\n"
+	    "    --cuda:          use cuda for decoding\n"
+	    "    --warmup:        warmup with n runs\n"
+	    "    --dump:          dump output stats files to dir\n",
+	    argv0);
+}
+
+function Options
+parse_argv(i32 argc, char *argv[])
+{
+	Options result = {0};
+
+	char *argv0 = argv[0];
+	shift(argv, argc);
+
+	while (argc > 0) {
+		s8 arg = c_str_to_s8(*argv);
+		shift(argv, argc);
+
+		if (s8_equal(arg, s8("--loop"))) {
+			result.loop = 1;
+		} else if (s8_equal(arg, s8("--full-aperture"))) {
+			result.full_aperture = 1;
+		} else if (s8_equal(arg, s8("--cuda"))) {
+			result.cuda = 1;
+		} else if (s8_equal(arg, s8("--dump"))) {
+			if (argc) {
+				result.outdir = *argv;
+				result.dump   = 1;
+				shift(argv, argc);
+			} else {
+				die("expected directory to dump to\n");
+			}
+		} else if (s8_equal(arg, s8("--once"))) {
+			result.once = 1;
+		} else if (s8_equal(arg, s8("--warmup"))) {
+			if (argc) {
+				result.warmup_count = (u32)atoi(*argv);
+				shift(argv, argc);
+			}
+		} else if (arg.len > 0 && arg.data[0] == '-') {
+			usage(argv0);
+		} else {
+			unshift(argv, argc);
+			break;
+		}
+	}
+
+	result.remaining       = argv;
+	result.remaining_count = argc;
+
+	return result;
+}
+
+function b32
+send_frame(i16 *restrict i16_data, u32 data_size)
+{
+	b32 result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0);
+	if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string());
+	return result;
+}
+
+function uv4
+decoded_data_dim(u32 transmit_count, b32 full_aperture)
+{
+	u32 max_transmits = decode_transmit_counts[countof(decode_transmit_counts) - 1];
+	uv4 result = {{RF_TIME_SAMPLES, full_aperture? max_transmits: transmit_count, transmit_count, 1}};
+	return result;
+}
+
+function uv2
+raw_data_dim(u32 transmit_count, b32 full_aperture)
+{
+	uv4 dec = decoded_data_dim(transmit_count, full_aperture);
+	uv2 result = {{dec.x * transmit_count,  256}};
+	return result;
+}
+
+function u32
+data_size_for_transmit_count(u32 transmit_count, b32 full_aperture)
+{
+	uv2 rf_dim = raw_data_dim(transmit_count, full_aperture);
+	u32 result = rf_dim.x * rf_dim.y * sizeof(i16);
+	return result;
+}
+
+function i16 *
+generate_test_data_for_transmit_count(u32 transmit_count, b32 full_aperture)
+{
+	uz  rf_size = data_size_for_transmit_count(transmit_count, full_aperture);
+	i16 *result = malloc(rf_size);
+	if (!result) die("malloc\n");
+	return result;
+}
+
+function void
+dump_stats(BeamformerComputeStatsTable *stats, Options *options, u32 transmit_count)
+{
+	char path_buffer[1024];
+	Stream sb = {.data = (u8 *)path_buffer, .cap = sizeof(path_buffer)};
+	stream_append_s8s(&sb, c_str_to_s8(options->outdir), s8(OS_PATH_SEPARATOR), s8("decode_"));
+	if (options->cuda) stream_append_s8(&sb, s8("cuda_"));
+	stream_append_u64(&sb, transmit_count);
+	stream_append_s8(&sb, s8(".bin"));
+	stream_append_byte(&sb, 0);
+	os_write_new_file(path_buffer, (s8){.len = sizeof(*stats), .data = (u8 *)stats});
+}
+
+function void
+send_parameters(Options *options, u32 transmit_count)
+{
+	BeamformerParameters bp = {0};
+	bp.decode_mode    = BeamformerDecodeMode_Hadamard;
+	b32 full_aperture = options->full_aperture;
+	uv3 dec_data_dim  = decoded_data_dim(transmit_count, full_aperture).xyz;
+	bp.sample_count      = dec_data_dim.x;
+	bp.channel_count     = dec_data_dim.y;
+	bp.acquisition_count = dec_data_dim.z;
+
+	bp.raw_data_dimensions = raw_data_dim(transmit_count, full_aperture);
+	beamformer_push_parameters(&bp);
+
+	/* NOTE(rnp): use real channel mapping so that we still get ~random~ access pattern */
+	read_only local_persist i16 channel_mapping[] = {
+		217, 129, 212, 188, 255, 131, 237, 190, 241, 130, 248, 187, 219, 128, 218, 181,
+		216, 134, 247, 180, 220, 132, 238, 178, 246, 133, 240, 179, 221, 135, 239, 173,
+		231, 137, 211, 172, 222, 139, 213, 170, 249, 138, 210, 171, 223, 136, 232, 189,
+		233, 142, 209, 164, 224, 140, 214, 186, 254, 141, 208, 163, 225, 143, 215, 185,
+		230, 145, 204, 162, 226, 147, 206, 165, 229, 146, 207, 161, 227, 144, 205, 182,
+		234, 150, 203, 160, 228, 148, 201, 166, 236, 149, 200, 159, 235, 175, 202, 177,
+		242, 151, 196, 191, 243, 155, 198, 167, 245, 154, 199, 158, 244, 176, 197, 174,
+		250, 168, 195, 184, 251, 156, 193, 152, 253, 153, 192, 157, 252, 183, 194, 169,
+		102,  62,  71,   3, 100,  60,  82,   1,  78,  61,  72,   4,  64,  63, 101,  10,
+		103,  57, 107,  11,  99,  59,  81,  13,  73,  58,  79,  12,  98,  56,  80,  18,
+		 88,  54, 108,  19,  97,  52, 106,  21,  70,  53, 109,  20,  96,  55,  87,   2,
+		 86,  49, 110,  27,  95,  51, 105,   5,  65,  50, 111,  28,  94,  48, 104,   6,
+		 89,  46, 115,  29,  93,  44, 113,  26,  90,  45, 112,  30,  92,  47, 114,   9,
+		 85,  41, 116,  31,  91,  43, 118,  25,  83,  42, 119,  32,  84,  16, 117,  14,
+		 77,  40, 123,   0,  76,  36, 121,  24,  74,  37, 120,  33,  75,  15, 122,  17,
+		 69,  23, 124,   7,  68,  35, 126,  39,  66,  38, 127,  34,  67,   8, 125,  22,
+	};
+	beamformer_push_channel_mapping(channel_mapping, countof(channel_mapping));
+
+	i32 shader_stages[1];
+	if (options->cuda) shader_stages[0] = BeamformerShaderKind_CudaDecode;
+	else               shader_stages[0] = BeamformerShaderKind_Decode;
+	beamformer_push_pipeline(shader_stages, 1, BeamformerDataKind_Int16);
+	beamformer_set_global_timeout(1000);
+}
+
+function f32
+execute_study(Options *options, u32 transmit_count, i16 *restrict data)
+{
+	send_parameters(options, transmit_count);
+	u32 data_size = data_size_for_transmit_count(transmit_count, options->full_aperture);
+	for (u32 i = 0; !g_should_exit && i < options->warmup_count; i++)
+		send_frame(data, data_size);
+
+	u64 start     = os_timer_count();
+	f64 frequency = os_timer_frequency();
+	for (u32 i = 0; !g_should_exit && i < AVERAGE_SAMPLES; i++)
+		send_frame(data, data_size);
+	f32 result = (os_timer_count() - start) / frequency / (f32)AVERAGE_SAMPLES;
+
+	return result;
+}
+
+function void
+print_result(u32 transmit_count, f32 time)
+{
+	printf("decode %3u | %uF Average: %8.3f [ms]\n", transmit_count, (u32)AVERAGE_SAMPLES, time * 1e3);
+}
+
+function void
+sigint(i32 _signo)
+{
+	g_should_exit = 1;
+}
+
+extern i32
+main(i32 argc, char *argv[])
+{
+	Options options = parse_argv(argc, argv);
+
+	if (options.remaining_count)
+		usage(argv[0]);
+
+	if (options.dump) os_make_directory(options.outdir);
+
+	signal(SIGINT, sigint);
+
+	BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1};
+	s8 short_name = s8("Decode Bench");
+	mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len);
+	lip.save_name_tag_length = (i32)short_name.len;
+	beamformer_set_live_parameters(&lip);
+
+	u32 max_transmit_count = decode_transmit_counts[countof(decode_transmit_counts) - 1];
+	i16 *data = generate_test_data_for_transmit_count(max_transmit_count, options.full_aperture);
+	if (options.loop) {
+		for (;!g_should_exit;) {
+			u32 transmit_count = decode_transmit_counts[0];
+			f32 time = execute_study(&options, transmit_count, data);
+			if (!g_should_exit) print_result(transmit_count, time);
+		}
+	} else if (options.once) {
+		u32 transmit_count = decode_transmit_counts[0];
+		u32 data_size = data_size_for_transmit_count(transmit_count, options.full_aperture);
+		send_parameters(&options, transmit_count);
+		send_frame(data, data_size);
+	} else {
+		BeamformerComputeStatsTable stats = {0};
+		for (iz i = 0; i < countof(decode_transmit_counts); i++) {
+			u32 transmit_count = decode_transmit_counts[i];
+			f32 time = execute_study(&options, transmit_count, data);
+			if (options.dump) {
+				beamformer_compute_timings(&stats, 1000);
+				dump_stats(&stats, &options, transmit_count);
+			}
+			if (!g_should_exit) print_result(transmit_count, time);
+		}
+	}
+
+	lip.active = 0;
+	beamformer_set_live_parameters(&lip);
+
+	return 0;
+}

M	build.c	\|	1	+
A	tests/decode.c	\|	310	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++