ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: ab1aa357b4c0427d168d75d1a05592de7e6e8f6f
Parent: e7cd9a0a86d4c44451cf5e85476e54cd71af96e4
Author: Randy Palamar
Date:   Fri, 20 Jun 2025 13:52:55 -0600

tests: add decode

This is useful for testing the library code without having any
data available.

Diffstat:
Mbuild.c | 1+
Atests/decode.c | 310+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 311 insertions(+), 0 deletions(-)

diff --git a/build.c b/build.c @@ -861,6 +861,7 @@ build_tests(Arena arena) #define TEST_PROGRAMS \ X("throughput", LINK_LIB("m"), LINK_LIB("zstd"), W32_DECL(LINK_LIB("Synchronization"))) \ + X("decode", LINK_LIB("m"), W32_DECL(LINK_LIB("Synchronization"))) \ os_make_directory(OUTPUT("tests")); if (!is_msvc) cmd_append(&arena, &cc, "-Wno-unused-function"); diff --git a/tests/decode.c b/tests/decode.c @@ -0,0 +1,310 @@ +/* See LICENSE for license details. */ +#define LIB_FN function +#include "ogl_beamformer_lib.c" + +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#define AVERAGE_SAMPLES countof(((BeamformerComputeStatsTable *)0)->times) +//#define RF_TIME_SAMPLES 2432 +#define RF_TIME_SAMPLES 4096 + +read_only global u32 decode_transmit_counts[] = { + 2, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 80, 96, 128, 160, 192, 256 +}; + +typedef struct { + b32 loop; + b32 cuda; + b32 once; + b32 dump; + b32 full_aperture; + + u32 warmup_count; + + char *outdir; + + char **remaining; + i32 remaining_count; +} Options; + +global b32 g_should_exit; + +#define die(...) die_((char *)__func__, __VA_ARGS__) +function no_return void +die_(char *function_name, char *format, ...) +{ + if (function_name) + fprintf(stderr, "%s: ", function_name); + + va_list ap; + + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + + os_exit(1); +} + +#if OS_LINUX + +function void +os_make_directory(char *name) +{ + mkdir(name, 0770); +} + +#elif OS_WINDOWS + +W32(b32) CreateDirectoryA(c8 *, void *); + +function void +os_make_directory(char *name) +{ + CreateDirectoryA(name, 0); +} + +#else +#error Unsupported Platform +#endif + +#define shift_n(v, c, n) v += n, c -= n +#define shift(v, c) shift_n(v, c, 1) +#define unshift(v, c) shift_n(v, c, -1) + +function void +usage(char *argv0) +{ + die("%s [--loop] [--once] [--full-aperture] [--cuda] [--warmup n] [--dump dir]\n" + " --loop: reupload data forever\n" + " --once: only run a single frame\n" + " --full-aperture: recieve on full 256 channel aperture\n" + " --cuda: use cuda for decoding\n" + " --warmup: warmup with n runs\n" + " --dump: dump output stats files to dir\n", + argv0); +} + +function Options +parse_argv(i32 argc, char *argv[]) +{ + Options result = {0}; + + char *argv0 = argv[0]; + shift(argv, argc); + + while (argc > 0) { + s8 arg = c_str_to_s8(*argv); + shift(argv, argc); + + if (s8_equal(arg, s8("--loop"))) { + result.loop = 1; + } else if (s8_equal(arg, s8("--full-aperture"))) { + result.full_aperture = 1; + } else if (s8_equal(arg, s8("--cuda"))) { + result.cuda = 1; + } else if (s8_equal(arg, s8("--dump"))) { + if (argc) { + result.outdir = *argv; + result.dump = 1; + shift(argv, argc); + } else { + die("expected directory to dump to\n"); + } + } else if (s8_equal(arg, s8("--once"))) { + result.once = 1; + } else if (s8_equal(arg, s8("--warmup"))) { + if (argc) { + result.warmup_count = (u32)atoi(*argv); + shift(argv, argc); + } + } else if (arg.len > 0 && arg.data[0] == '-') { + usage(argv0); + } else { + unshift(argv, argc); + break; + } + } + + result.remaining = argv; + result.remaining_count = argc; + + return result; +} + +function b32 +send_frame(i16 *restrict i16_data, u32 data_size) +{ + b32 result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0); + if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string()); + return result; +} + +function uv4 +decoded_data_dim(u32 transmit_count, b32 full_aperture) +{ + u32 max_transmits = decode_transmit_counts[countof(decode_transmit_counts) - 1]; + uv4 result = {{RF_TIME_SAMPLES, full_aperture? max_transmits: transmit_count, transmit_count, 1}}; + return result; +} + +function uv2 +raw_data_dim(u32 transmit_count, b32 full_aperture) +{ + uv4 dec = decoded_data_dim(transmit_count, full_aperture); + uv2 result = {{dec.x * transmit_count, 256}}; + return result; +} + +function u32 +data_size_for_transmit_count(u32 transmit_count, b32 full_aperture) +{ + uv2 rf_dim = raw_data_dim(transmit_count, full_aperture); + u32 result = rf_dim.x * rf_dim.y * sizeof(i16); + return result; +} + +function i16 * +generate_test_data_for_transmit_count(u32 transmit_count, b32 full_aperture) +{ + uz rf_size = data_size_for_transmit_count(transmit_count, full_aperture); + i16 *result = malloc(rf_size); + if (!result) die("malloc\n"); + return result; +} + +function void +dump_stats(BeamformerComputeStatsTable *stats, Options *options, u32 transmit_count) +{ + char path_buffer[1024]; + Stream sb = {.data = (u8 *)path_buffer, .cap = sizeof(path_buffer)}; + stream_append_s8s(&sb, c_str_to_s8(options->outdir), s8(OS_PATH_SEPARATOR), s8("decode_")); + if (options->cuda) stream_append_s8(&sb, s8("cuda_")); + stream_append_u64(&sb, transmit_count); + stream_append_s8(&sb, s8(".bin")); + stream_append_byte(&sb, 0); + os_write_new_file(path_buffer, (s8){.len = sizeof(*stats), .data = (u8 *)stats}); +} + +function void +send_parameters(Options *options, u32 transmit_count) +{ + BeamformerParameters bp = {0}; + bp.decode_mode = BeamformerDecodeMode_Hadamard; + b32 full_aperture = options->full_aperture; + uv3 dec_data_dim = decoded_data_dim(transmit_count, full_aperture).xyz; + bp.sample_count = dec_data_dim.x; + bp.channel_count = dec_data_dim.y; + bp.acquisition_count = dec_data_dim.z; + + bp.raw_data_dimensions = raw_data_dim(transmit_count, full_aperture); + beamformer_push_parameters(&bp); + + /* NOTE(rnp): use real channel mapping so that we still get ~random~ access pattern */ + read_only local_persist i16 channel_mapping[] = { + 217, 129, 212, 188, 255, 131, 237, 190, 241, 130, 248, 187, 219, 128, 218, 181, + 216, 134, 247, 180, 220, 132, 238, 178, 246, 133, 240, 179, 221, 135, 239, 173, + 231, 137, 211, 172, 222, 139, 213, 170, 249, 138, 210, 171, 223, 136, 232, 189, + 233, 142, 209, 164, 224, 140, 214, 186, 254, 141, 208, 163, 225, 143, 215, 185, + 230, 145, 204, 162, 226, 147, 206, 165, 229, 146, 207, 161, 227, 144, 205, 182, + 234, 150, 203, 160, 228, 148, 201, 166, 236, 149, 200, 159, 235, 175, 202, 177, + 242, 151, 196, 191, 243, 155, 198, 167, 245, 154, 199, 158, 244, 176, 197, 174, + 250, 168, 195, 184, 251, 156, 193, 152, 253, 153, 192, 157, 252, 183, 194, 169, + 102, 62, 71, 3, 100, 60, 82, 1, 78, 61, 72, 4, 64, 63, 101, 10, + 103, 57, 107, 11, 99, 59, 81, 13, 73, 58, 79, 12, 98, 56, 80, 18, + 88, 54, 108, 19, 97, 52, 106, 21, 70, 53, 109, 20, 96, 55, 87, 2, + 86, 49, 110, 27, 95, 51, 105, 5, 65, 50, 111, 28, 94, 48, 104, 6, + 89, 46, 115, 29, 93, 44, 113, 26, 90, 45, 112, 30, 92, 47, 114, 9, + 85, 41, 116, 31, 91, 43, 118, 25, 83, 42, 119, 32, 84, 16, 117, 14, + 77, 40, 123, 0, 76, 36, 121, 24, 74, 37, 120, 33, 75, 15, 122, 17, + 69, 23, 124, 7, 68, 35, 126, 39, 66, 38, 127, 34, 67, 8, 125, 22, + }; + beamformer_push_channel_mapping(channel_mapping, countof(channel_mapping)); + + i32 shader_stages[1]; + if (options->cuda) shader_stages[0] = BeamformerShaderKind_CudaDecode; + else shader_stages[0] = BeamformerShaderKind_Decode; + beamformer_push_pipeline(shader_stages, 1, BeamformerDataKind_Int16); + beamformer_set_global_timeout(1000); +} + +function f32 +execute_study(Options *options, u32 transmit_count, i16 *restrict data) +{ + send_parameters(options, transmit_count); + u32 data_size = data_size_for_transmit_count(transmit_count, options->full_aperture); + for (u32 i = 0; !g_should_exit && i < options->warmup_count; i++) + send_frame(data, data_size); + + u64 start = os_timer_count(); + f64 frequency = os_timer_frequency(); + for (u32 i = 0; !g_should_exit && i < AVERAGE_SAMPLES; i++) + send_frame(data, data_size); + f32 result = (os_timer_count() - start) / frequency / (f32)AVERAGE_SAMPLES; + + return result; +} + +function void +print_result(u32 transmit_count, f32 time) +{ + printf("decode %3u | %uF Average: %8.3f [ms]\n", transmit_count, (u32)AVERAGE_SAMPLES, time * 1e3); +} + +function void +sigint(i32 _signo) +{ + g_should_exit = 1; +} + +extern i32 +main(i32 argc, char *argv[]) +{ + Options options = parse_argv(argc, argv); + + if (options.remaining_count) + usage(argv[0]); + + if (options.dump) os_make_directory(options.outdir); + + signal(SIGINT, sigint); + + BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1}; + s8 short_name = s8("Decode Bench"); + mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len); + lip.save_name_tag_length = (i32)short_name.len; + beamformer_set_live_parameters(&lip); + + u32 max_transmit_count = decode_transmit_counts[countof(decode_transmit_counts) - 1]; + i16 *data = generate_test_data_for_transmit_count(max_transmit_count, options.full_aperture); + if (options.loop) { + for (;!g_should_exit;) { + u32 transmit_count = decode_transmit_counts[0]; + f32 time = execute_study(&options, transmit_count, data); + if (!g_should_exit) print_result(transmit_count, time); + } + } else if (options.once) { + u32 transmit_count = decode_transmit_counts[0]; + u32 data_size = data_size_for_transmit_count(transmit_count, options.full_aperture); + send_parameters(&options, transmit_count); + send_frame(data, data_size); + } else { + BeamformerComputeStatsTable stats = {0}; + for (iz i = 0; i < countof(decode_transmit_counts); i++) { + u32 transmit_count = decode_transmit_counts[i]; + f32 time = execute_study(&options, transmit_count, data); + if (options.dump) { + beamformer_compute_timings(&stats, 1000); + dump_stats(&stats, &options, transmit_count); + } + if (!g_should_exit) print_result(transmit_count, time); + } + } + + lip.active = 0; + beamformer_set_live_parameters(&lip); + + return 0; +}