Commit: f14dab12d92fc9cd8a2089cd2860ab0b135710ba
Parent: 1652db3df328b39771de42b2820a2d5b07cc1d32
Author: Randy Palamar
Date: Fri, 13 Jun 2025 14:13:01 -0600
tests: add throughput measurement tool
This has existed in my temp branch for a while but I think its
useful enough to include as an example. I'm also often wanting to
compare throughput between master and wip and this makes that much
less annoying.
Diffstat:
4 files changed, 483 insertions(+), 13 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -19,7 +19,10 @@ jobs:
sudo apt update
sudo apt install libxkbcommon-dev xorg-dev
- name: Build
- run: ${{matrix.cc}} -march=native -O3 build.c -Iexternal/include -o build && ./build && ./build --debug
+ run: |
+ ${{matrix.cc}} -march=native -O3 build.c -Iexternal/include -o build && \
+ ./build --tests && \
+ ./build --debug
windows:
runs-on: windows-latest
@@ -43,4 +46,7 @@ jobs:
install: git mingw-w64-${{matrix.env}}-${{matrix.cc}}
- name: Build
- run: ${{matrix.cc}} -march=native -O3 build.c -Iexternal/include -o build && ./build && ./build --debug
+ run: |
+ ${{matrix.cc}} -march=native -O3 build.c -Iexternal/include -o build && \
+ ./build --tests && \
+ ./build --debug
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
*
+!tests
!external
!helpers
!shaders
diff --git a/build.c b/build.c
@@ -44,6 +44,8 @@
#include "os_linux.c"
+ #define W32_DECL(x)
+
#define OS_SHARED_LINK_LIB(s) "lib" s ".so"
#define OS_SHARED_LIB(s) s ".so"
#define OS_STATIC_LIB(s) s ".a"
@@ -53,6 +55,8 @@
#include "os_win32.c"
+ #define W32_DECL(x) x
+
#define OS_SHARED_LINK_LIB(s) s ".dll"
#define OS_SHARED_LIB(s) s ".dll"
#define OS_STATIC_LIB(s) s ".lib"
@@ -76,7 +80,6 @@
#define OUTPUT_DLL(name) "/LD", "/Fe:", name
#define OUTPUT_LIB(name) "/out:" OUTPUT(name)
#define OUTPUT_EXE(name) "/Fe:", name
- #define SINGLE_OBJECT(in, out) "/c", (in), "/Fo:", (out)
#define STATIC_LIBRARY_BEGIN(name) "lib", "/nologo", name
#else
#define LINK_LIB(name) "-l" name
@@ -84,7 +87,6 @@
#define OUTPUT_DLL(name) "-fPIC", "-shared", "-o", name
#define OUTPUT_LIB(name) OUTPUT(name)
#define OUTPUT_EXE(name) "-o", name
- #define SINGLE_OBJECT(in, out) "-c", (in), "-o", (out)
#define STATIC_LIBRARY_BEGIN(name) "ar", "rc", name
#endif
@@ -110,6 +112,7 @@ typedef struct {
b32 debug;
b32 generic;
b32 sanitize;
+ b32 tests;
b32 time;
} Options;
@@ -126,7 +129,7 @@ function void
build_log_base(BuildLogKind kind, char *format, va_list args)
{
#define X(t, pre) pre,
- read_only local_persist char *prefixes[BuildLogKind_Count + 1] = {BUILD_LOG_KINDS "[INVALID]"};
+ read_only local_persist char *prefixes[BuildLogKind_Count + 1] = {BUILD_LOG_KINDS "[INVALID] "};
#undef X
FILE *out = kind == BuildLogKind_Error? stderr : stdout;
fputs(prefixes[MIN(kind, BuildLogKind_Count)], out);
@@ -464,6 +467,7 @@ usage(char *argv0)
" --debug: dynamically link and build with debug symbols\n"
" --generic: compile for a generic target (x86-64-v3 or armv8 with NEON)\n"
" --sanitize: build with ASAN and UBSAN\n"
+ " --tests: also build programs in tests/\n"
" --time: print build time\n"
, argv0);
os_exit(0);
@@ -484,6 +488,8 @@ parse_options(i32 argc, char *argv[])
result.generic = 1;
} else if (s8_equal(str, s8("--sanitize"))) {
result.sanitize = 1;
+ } else if (s8_equal(str, s8("--tests"))) {
+ result.tests = 1;
} else if (s8_equal(str, s8("--time"))) {
result.time = 1;
} else {
@@ -540,6 +546,19 @@ build_shared_library(Arena a, CommandList cc, char *name, char *output, char **l
}
function b32
+cc_single_file(Arena a, CommandList cc, b32 exe, char *src, char *dest, char **tail, iz tail_count)
+{
+ char *executable[] = {src, is_msvc? "/Fe:" : "-o", dest};
+ char *object[] = {is_msvc? "/c" : "-c", src, is_msvc? "/Fo:" : "-o", dest};
+ cmd_append_count(&a, &cc, exe? executable : object,
+ exe? countof(executable) : countof(object));
+ cmd_append_count(&a, &cc, tail, tail_count);
+ b32 result = run_synchronous(a, &cc);
+ if (!result) build_log_failure("%s", dest);
+ return result;
+}
+
+function b32
build_static_library_from_objects(Arena a, char *name, char **flags, iz flags_count, char **objects, iz count)
{
CommandList ar = {0};
@@ -556,14 +575,10 @@ function b32
build_static_library(Arena a, CommandList cc, char *name, char **deps, char **outputs, iz count)
{
/* TODO(rnp): refactor to not need outputs */
- b32 result = 0;
- b32 all_success = 1;
- for (iz i = 0; i < count; i++) {
- cmd_append(&a, &cc, SINGLE_OBJECT(deps[i], outputs[i]), (void *)0);
- all_success &= run_synchronous(a, &cc);
- cc.count -= 5;
- }
- if (all_success) result = build_static_library_from_objects(a, name, 0, 0, outputs, count);
+ b32 result = 1;
+ for (iz i = 0; i < count; i++)
+ result &= cc_single_file(a, cc, 0, deps[i], outputs[i], 0, 0);
+ if (result) result = build_static_library_from_objects(a, name, 0, 0, outputs, count);
return result;
}
@@ -652,6 +667,25 @@ build_beamformer_as_library(Arena arena, CommandList cc)
return result;
}
+function b32
+build_tests(Arena arena, CommandList cc)
+{
+ #define TEST_PROGRAMS \
+ X("throughput", LINK_LIB("zstd"), W32_DECL(LINK_LIB("Synchronization")))
+
+ os_make_directory(OUTPUT("tests"));
+ cmd_append(&arena, &cc, "-Wno-unused-function", "-Ihelpers");
+
+ b32 result = 1;
+ #define X(prog, ...) \
+ result &= cc_single_file(arena, cc, 1, "tests/" prog ".c", \
+ OUTPUT("tests/" prog), \
+ arg_list(char *, ##__VA_ARGS__));
+ TEST_PROGRAMS
+ #undef X
+ return result;
+}
+
i32
main(i32 argc, char *argv[])
{
@@ -670,6 +704,8 @@ main(i32 argc, char *argv[])
result &= build_helper_library(arena, c);
+ if (options.tests) result &= build_tests(arena, c);
+
//////////////////
// static portion
iz c_count = c.count;
diff --git a/tests/throughput.c b/tests/throughput.c
@@ -0,0 +1,427 @@
+/* See LICENSE for license details. */
+/* TODO(rnp):
+ * [ ]: for finer grained evaluation of throughput latency just queue a data upload
+ * without replacing the data.
+ */
+
+#include "ogl_beamformer_lib.c"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <zstd.h>
+
+global u32 g_output_points[4] = {512, 1, 1024, 1};
+global v2 g_axial_extent = {{ 10e-3, 165e-3}};
+global v2 g_lateral_extent = {{-60e-3, 60e-3}};
+global f32 g_f_number = 0.5;
+
+typedef struct {
+ b32 loop;
+ b32 cuda;
+ u32 frame_number;
+
+ char **remaining;
+ i32 remaining_count;
+} Options;
+
+#define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull
+typedef struct {
+ u64 magic;
+ u32 version;
+ u16 decode_mode;
+ u16 beamform_mode;
+ u32 raw_data_dim[4];
+ u32 decoded_data_dim[4];
+ f32 xdc_element_pitch[2];
+ f32 xdc_transform[16]; /* NOTE: column major order */
+ i16 channel_mapping[256];
+ f32 transmit_angles[256];
+ f32 focal_depths[256];
+ i16 sparse_elements[256];
+ i16 hadamard_rows[256];
+ f32 speed_of_sound;
+ f32 center_frequency;
+ f32 sampling_frequency;
+ f32 time_offset;
+ u32 transmit_mode;
+} zemp_bp_v1;
+
+#define die(...) die_((char *)__func__, __VA_ARGS__)
+function no_return void
+die_(char *function_name, char *format, ...)
+{
+ if (function_name)
+ fprintf(stderr, "%s: ", function_name);
+
+ va_list ap;
+
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+
+ os_exit(1);
+}
+
+#if OS_LINUX
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+function void os_init_timer(void) { }
+
+function f64
+os_get_time(void)
+{
+ f64 result = (f64)os_get_timer_counter() / os_get_timer_frequency();
+ return result;
+}
+
+function s8
+os_read_file_simp(char *fname)
+{
+ s8 result;
+ i32 fd = open(fname, O_RDONLY);
+ if (fd < 0)
+ die("couldn't open file: %s\n", fname);
+
+ struct stat st;
+ if (stat(fname, &st) < 0)
+ die("couldn't stat file\n");
+
+ result.len = st.st_size;
+ result.data = malloc(st.st_size);
+ if (!result.data)
+ die("couldn't alloc space for reading\n");
+
+ iz rlen = read(fd, result.data, st.st_size);
+ close(fd);
+
+ if (rlen != st.st_size)
+ die("couldn't read file: %s\n", fname);
+
+ return result;
+}
+
+#elif OS_WINDOWS
+
+global os_w32_context os_context;
+
+function void
+os_init_timer(void)
+{
+ os_context.timer_frequency = os_get_timer_frequency();
+}
+
+function f64
+os_get_time(void)
+{
+ f64 result = (f64)os_get_timer_counter() / os_context.timer_frequency;
+ return result;
+}
+
+function s8
+os_read_file_simp(char *fname)
+{
+ s8 result;
+ iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
+ if (h == INVALID_FILE)
+ die("couldn't open file: %s\n", fname);
+
+ w32_file_info fileinfo;
+ if (!GetFileInformationByHandle(h, &fileinfo))
+ die("couldn't get file info\n", stderr);
+
+ result.len = fileinfo.nFileSizeLow;
+ result.data = malloc(fileinfo.nFileSizeLow);
+ if (!result.data)
+ die("couldn't alloc space for reading\n");
+
+ i32 rlen = 0;
+ if (!ReadFile(h, result.data, fileinfo.nFileSizeLow, &rlen, 0) && rlen != fileinfo.nFileSizeLow)
+ die("couldn't read file: %s\n", fname);
+ CloseHandle(h);
+
+ return result;
+}
+
+#else
+#error Unsupported Platform
+#endif
+
+function void
+stream_ensure_termination(Stream *s, u8 byte)
+{
+ b32 found = 0;
+ if (!s->errors && s->widx > 0)
+ found = s->data[s->widx - 1] == byte;
+ if (!found) {
+ s->errors |= s->cap - 1 < s->widx;
+ if (!s->errors)
+ s->data[s->widx++] = byte;
+ }
+}
+
+function void
+stream_append_u64_width(Stream *s, u64 n, u64 min_width)
+{
+ u8 tmp[64];
+ u8 *end = tmp + sizeof(tmp);
+ u8 *beg = end;
+ min_width = MIN(sizeof(tmp), min_width);
+
+ do { *--beg = '0' + (n % 10); } while (n /= 10);
+ while (end - beg > 0 && end - beg < min_width)
+ *--beg = '0';
+
+ stream_append(s, beg, end - beg);
+}
+
+function void *
+decompress_zstd_data(s8 raw)
+{
+ iz requested_size = ZSTD_getFrameContentSize(raw.data, raw.len);
+ void *out = malloc(requested_size);
+ if (out) {
+ iz decompressed = ZSTD_decompress(out, requested_size, raw.data, raw.len);
+ if (decompressed != requested_size) {
+ free(out);
+ out = 0;
+ }
+ }
+ return out;
+}
+
+function zemp_bp_v1 *
+read_zemp_bp_v1(u8 *path)
+{
+ s8 raw = os_read_file_simp((char *)path);
+ zemp_bp_v1 *result = 0;
+ if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) {
+ if (((zemp_bp_v1 *)raw.data)->version == 1)
+ result = (zemp_bp_v1 *)raw.data;
+ }
+ return result;
+}
+
+function void
+fill_beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out)
+{
+ mem_copy(out->xdc_transform, zbp->xdc_transform, sizeof(out->xdc_transform));
+ mem_copy(out->dec_data_dim, zbp->decoded_data_dim, sizeof(out->dec_data_dim));
+ mem_copy(out->xdc_element_pitch, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch));
+ mem_copy(out->rf_raw_dim, zbp->raw_data_dim, sizeof(out->rf_raw_dim));
+
+ out->transmit_mode = zbp->transmit_mode;
+ out->decode = zbp->decode_mode;
+ out->das_shader_id = zbp->beamform_mode;
+ out->time_offset = zbp->time_offset;
+ out->sampling_frequency = zbp->sampling_frequency;
+ out->center_frequency = zbp->center_frequency;
+ out->speed_of_sound = zbp->speed_of_sound;
+}
+
+#define shift_n(v, c, n) v += n, c -= n
+#define shift(v, c) shift_n(v, c, 1)
+
+function void
+usage(char *argv0)
+{
+ die("%s [--loop] [--cuda] [--frame n] base_path study\n"
+ " --loop: reupload data forever\n"
+ " --cuda: use cuda for decoding\n"
+ " --frame n: use frame n of the data for display\n",
+ argv0);
+}
+
+function b32
+s8_equal(s8 a, s8 b)
+{
+ b32 result = a.len == b.len;
+ for (iz i = 0; result && i < a.len; i++)
+ result &= a.data[i] == b.data[i];
+ return result;
+}
+
+function Options
+parse_argv(i32 argc, char *argv[])
+{
+ Options result = {0};
+
+ char *argv0 = argv[0];
+ shift(argv, argc);
+
+ while (argc > 0) {
+ s8 arg = c_str_to_s8(*argv);
+
+ if (s8_equal(arg, s8("--loop"))) {
+ shift(argv, argc);
+ result.loop = 1;
+ } else if (s8_equal(arg, s8("--cuda"))) {
+ shift(argv, argc);
+ result.cuda = 1;
+ } else if (s8_equal(arg, s8("--frame"))) {
+ shift(argv, argc);
+ if (argc) {
+ result.frame_number = atoi(*argv);
+ shift(argv, argc);
+ }
+ } else if (arg.len > 0 && arg.data[0] == '-') {
+ usage(argv0);
+ } else {
+ break;
+ }
+ }
+
+ result.remaining = argv;
+ result.remaining_count = argc;
+
+ return result;
+}
+
+function i16 *
+decompress_data_at_work_index(Stream *path_base, u32 index)
+{
+ stream_append_byte(path_base, '_');
+ stream_append_u64_width(path_base, index, 2);
+ stream_append_s8(path_base, s8(".zst"));
+ stream_ensure_termination(path_base, 0);
+
+ s8 compressed_data = os_read_file_simp((char *)path_base->data);
+ i16 *result = decompress_zstd_data(compressed_data);
+ if (!result)
+ die("failed to decompress data: %s\n", path_base->data);
+ free(compressed_data.data);
+
+ return result;
+}
+
+function b32
+send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp)
+{
+ b32 result = 0;
+ u32 data_size = bp->rf_raw_dim[0] * bp->rf_raw_dim[1] * sizeof(i16);
+
+ if (beamformer_push_data_with_compute(i16_data, data_size, IPT_XZ, 100))
+ //if (beamformer_push_data(i16_data, data_size, 100))
+ result = beamformer_start_compute(-1);
+ if (!result) printf("lib error: %s\n", beamformer_get_last_error_string());
+
+ return result;
+}
+
+function void
+execute_study(s8 study, Arena arena, Stream path, Options *options)
+{
+ fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data);
+
+ stream_append_s8(&path, study);
+ stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
+ stream_append_s8(&path, study);
+ iz path_work_index = path.widx;
+
+ stream_append_s8(&path, s8(".bp"));
+ stream_ensure_termination(&path, 0);
+
+ zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data);
+ if (!zbp) die("failed to unpack parameters file\n");
+
+ BeamformerParameters bp = {0};
+ fill_beamformer_parameters_from_zemp_bp_v1(zbp, &bp);
+
+ mem_copy(bp.output_points, g_output_points, sizeof(bp.output_points));
+ bp.output_points[3] = 1;
+
+ bp.output_min_coordinate[0] = g_lateral_extent.x;
+ bp.output_min_coordinate[1] = 0;
+ bp.output_min_coordinate[2] = g_axial_extent.x;
+ bp.output_min_coordinate[3] = 0;
+
+ bp.output_max_coordinate[0] = g_lateral_extent.y;
+ bp.output_max_coordinate[1] = 0;
+ bp.output_max_coordinate[2] = g_axial_extent.y;
+ bp.output_max_coordinate[3] = 0;
+
+ bp.f_number = g_f_number;
+ bp.beamform_plane = 0;
+ bp.interpolate = 0;
+
+ if (zbp->sparse_elements[0] == -1) {
+ for (u32 i = 0; i < countof(zbp->sparse_elements); i++)
+ zbp->sparse_elements[i] = i;
+ }
+
+ {
+ align_as(64) v2 focal_vectors[countof(zbp->focal_depths)];
+ for (u32 i = 0; i < countof(zbp->focal_depths); i++)
+ focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}};
+ beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0);
+ }
+
+ beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping), 0);
+ beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements), 0);
+ beamformer_push_parameters(&bp, 0);
+
+ free(zbp);
+
+ i32 shader_stages[16];
+ i32 shader_stage_count = 0;
+ if (options->cuda) shader_stages[shader_stage_count++] = ComputeShaderKind_CudaDecode;
+ else shader_stages[shader_stage_count++] = ComputeShaderKind_Decode;
+ shader_stages[shader_stage_count++] = ComputeShaderKind_DASCompute;
+
+ set_beamformer_pipeline(shader_stages, shader_stage_count);
+
+ stream_reset(&path, path_work_index);
+ i16 *data = decompress_data_at_work_index(&path, options->frame_number);
+
+ if (options->loop) {
+ u32 frame = 0;
+ f32 times[32] = {0};
+ f32 data_size = bp.rf_raw_dim[0] * bp.rf_raw_dim[1] * sizeof(*data);
+ f64 start = os_get_time();
+ for (;;) {
+ if (send_frame(data, &bp)) {
+ f64 now = os_get_time();
+ f32 delta = now - start;
+ start = now;
+
+ if ((frame % 16) == 0) {
+ f32 sum = 0;
+ for (u32 i = 0; i < countof(times); i++)
+ sum += times[i] / countof(times);
+ printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n",
+ delta * 1e3, sum * 1e3, data_size / (sum * (GB(1))));
+ }
+
+ times[frame & 31] = delta;
+ frame++;
+ }
+ }
+ } else {
+ send_frame(data, &bp);
+ }
+
+ free(data);
+}
+
+int
+main(i32 argc, char *argv[])
+{
+ Options options = parse_argv(argc, argv);
+
+ if (!BETWEEN(options.remaining_count, 1, 2))
+ usage(argv[0]);
+
+ os_init_timer();
+
+ Arena arena = os_alloc_arena((Arena){0}, KB(8));
+ Stream path = stream_alloc(&arena, KB(4));
+ stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
+ stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
+
+ execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options);
+
+ return 0;
+}