ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: cbc2a53fa980b6b7d1727806a8b476a43cbc4372
Parent: 9e785e1a4b4d921d0e80b2267ecadb6014a61376
Author: Randy Palamar
Date:   Wed,  4 Jun 2025 10:17:06 -0600

build: partially support msvc

for now only the static/release build is supported

Diffstat:
MREADME.md | 6+++---
Mbeamformer.c | 12++++++------
Mbeamformer_work_queue.c | 22+++++++++++-----------
Mbuild.c | 180++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mcompiler.h | 6+++---
Mhelpers/ogl_beamformer_lib.c | 6+++---
Mintrinsics.c | 66+++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Mos_linux.c | 2+-
Mos_win32.c | 2+-
Mstatic.c | 10+++++-----
Mui.c | 20++++++++++----------
Mutil.h | 27+++++++++++++++------------
12 files changed, 222 insertions(+), 137 deletions(-)

diff --git a/README.md b/README.md @@ -21,6 +21,6 @@ suitable for development/debugging: ``` ### w32 -Currently the program is not expected to be buildable with `msvc`. -In order to use Windows debuggers on Windows you will need to -build with `clang`. +Currently `msvc` support is limited to the release build of the +program. PDBs will be avialable but may be of limited use. `clang` +is fully supported and recommended (and also produces better code). diff --git a/beamformer.c b/beamformer.c @@ -158,8 +158,8 @@ fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, ImagePlaneTag pl b32 result = 0; if (work) { result = 1; - u32 frame_id = atomic_inc_u32(&ctx->next_render_frame_index, 1); - u32 frame_index = frame_id % ARRAY_COUNT(ctx->beamform_frames); + u32 frame_id = atomic_add_u32(&ctx->next_render_frame_index, 1); + u32 frame_index = frame_id % countof(ctx->beamform_frames); work->type = BW_COMPUTE; work->frame = ctx->beamform_frames + frame_index; work->frame->ready_to_present = 0; @@ -516,7 +516,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co } } break; case BW_UPLOAD_BUFFER: { - ASSERT(!atomic_load((i32 *)(barrier_offset + work->completion_barrier))); + assert(!atomic_load_u32((i32 *)(barrier_offset + work->completion_barrier))); BeamformerUploadContext *uc = &work->upload_context; u32 tex_type, tex_format, tex_element_count, tex_1d = 0, buffer = 0; switch (uc->kind) { @@ -565,7 +565,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co } } break; case BW_COMPUTE: { - atomic_store(&cs->processing_compute, 1); + atomic_store_u32(&cs->processing_compute, 1); start_renderdoc_capture(gl_context); BeamformComputeFrame *frame = work->frame; @@ -623,7 +623,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co /* TODO(rnp): not really sure what to do here */ mem_copy(&ctx->averaged_frames[aframe_index].stats.times, &frame->stats.times, sizeof(frame->stats.times)); - atomic_inc_u32(&ctx->averaged_frame_index, 1); + atomic_add_u32(&ctx->averaged_frame_index, 1); } frame->ready_to_present = 1; cs->processing_compute = 0; @@ -702,7 +702,7 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) BeamformerParameters *bp = &ctx->shared_memory->parameters; if (ctx->shared_memory->dispatch_compute_sync) { ImagePlaneTag current_plane = ctx->shared_memory->current_image_plane; - atomic_store(&ctx->shared_memory->dispatch_compute_sync, 0); + atomic_store_u32(&ctx->shared_memory->dispatch_compute_sync, 0); BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); if (work) { if (fill_frame_compute_work(ctx, work, current_plane)) diff --git a/beamformer_work_queue.c b/beamformer_work_queue.c @@ -6,9 +6,9 @@ beamform_work_queue_pop(BeamformWorkQueue *q) { BeamformWork *result = 0; - static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); - u64 val = atomic_load(&q->queue); - u64 mask = ARRAY_COUNT(q->work_items) - 1; + static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); + u64 val = atomic_load_u64(&q->queue); + u64 mask = countof(q->work_items) - 1; u32 widx = val & mask; u32 ridx = val >> 32 & mask; @@ -21,22 +21,22 @@ beamform_work_queue_pop(BeamformWorkQueue *q) function void beamform_work_queue_pop_commit(BeamformWorkQueue *q) { - atomic_add(&q->queue, 0x100000000ULL); + atomic_add_u64(&q->queue, 0x100000000ULL); } DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push) { BeamformWork *result = 0; - static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); - u64 val = atomic_load(&q->queue); - u64 mask = ARRAY_COUNT(q->work_items) - 1; + static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); + u64 val = atomic_load_u64(&q->queue); + u64 mask = countof(q->work_items) - 1; u32 widx = val & mask; u32 ridx = val >> 32 & mask; u32 next = (widx + 1) & mask; if (val & 0x80000000) - atomic_and(&q->queue, ~0x80000000); + atomic_and_u64(&q->queue, ~0x80000000); if (next != ridx) { result = q->work_items + widx; @@ -48,7 +48,7 @@ DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push) DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit) { - atomic_add(&q->queue, 1); + atomic_add_u64(&q->queue, 1); } function b32 @@ -56,8 +56,8 @@ try_wait_sync(i32 *sync, i32 timeout_ms, os_wait_on_value_fn *os_wait_on_value) { b32 result = 0; for (;;) { - i32 current = atomic_load(sync); - if (current && atomic_cas(sync, &current, 0)) { + i32 current = atomic_load_u32(sync); + if (current && atomic_cas_u32(sync, &current, 0)) { result = 1; break; } diff --git a/build.c b/build.c @@ -7,23 +7,33 @@ * to it with extern from the C source (bake both data and size) * - use objcopy, maybe need linker script maybe command line flags for ld will work * [ ]: cross compile/override baked compiler + * [ ]: msvc build doesn't detect out of date files correctly + * [ ]: seperate dwarf debug info */ +#include <stdarg.h> +#include <stdio.h> -#define COMMON_FLAGS "-std=c11", "-Wall", "-Iexternal/include" +#include "util.h" #define OUTDIR "out" #define OUTPUT(s) OUTDIR "/" s -#include "util.h" - -#include <stdarg.h> -#include <stdio.h> +#if COMPILER_MSVC + #define COMMON_FLAGS "-nologo", "-std:c11", "-Fo:" OUTDIR "\\", "-Z7", "-Zo" + #define DEBUG_FLAGS "-Od", "-D_DEBUG" + #define OPTIMIZED_FLAGS "-O2" +#else + #define COMMON_FLAGS "-std=c11", "-pipe", "-Wall" + #define DEBUG_FLAGS "-O0", "-D_DEBUG", "-Wno-unused-function" + #define OPTIMIZED_FLAGS "-O3" +#endif #define is_aarch64 ARCH_ARM64 #define is_amd64 ARCH_X64 #define is_unix OS_LINUX #define is_w32 OS_WINDOWS #define is_clang COMPILER_CLANG +#define is_msvc COMPILER_MSVC #if OS_LINUX @@ -34,14 +44,18 @@ #include "os_linux.c" - #define OS_SHARED_LIB(s) s ".so" + #define OS_SHARED_LINK_LIB(s) "lib" s ".so" + #define OS_SHARED_LIB(s) s ".so" + #define OS_STATIC_LIB(s) s ".a" #define OS_MAIN "main_linux.c" #elif OS_WINDOWS #include "os_win32.c" - #define OS_SHARED_LIB(s) s ".dll" + #define OS_SHARED_LINK_LIB(s) s ".dll" + #define OS_SHARED_LIB(s) s ".dll" + #define OS_STATIC_LIB(s) s ".lib" #define OS_MAIN "main_w32.c" #else @@ -56,6 +70,24 @@ #define COMPILER "cc" #endif +#if COMPILER_MSVC + #define LINK_LIB(name) name ".lib" + #define OBJECT(name) name ".obj" + #define OUTPUT_DLL(name) "/LD", "/Fe:", name + #define OUTPUT_LIB(name) "/out:" name + #define OUTPUT_EXE(name) "/Fe:", name + #define SINGLE_OBJECT(in, out) "/c", (in), "/Fo:", (out) + #define STATIC_LIBRARY_BEGIN(name) "lib", "/nologo", name +#else + #define LINK_LIB(name) "-l" name + #define OBJECT(name) name ".o" + #define OUTPUT_DLL(name) "-fPIC", "-shared", "-o", name + #define OUTPUT_LIB(name) name + #define OUTPUT_EXE(name) "-o", name + #define SINGLE_OBJECT(in, out) "-c", (in), "-o", (out) + #define STATIC_LIBRARY_BEGIN(name) "ar", "rc", name +#endif + #define shift(list, count) ((count)--, *(list)++) #define da_append_count(a, s, items, item_count) do { \ @@ -325,21 +357,25 @@ cmd_base(Arena *a, Options *o) CommandList result = {0}; cmd_append(a, &result, COMPILER); - /* TODO(rnp): support cross compiling with clang */ - if (!o->generic) cmd_append(a, &result, "-march=native"); - else if (is_amd64) cmd_append(a, &result, "-march=x86-64-v3"); - else if (is_aarch64) cmd_append(a, &result, "-march=armv8"); - - cmd_append(a, &result, COMMON_FLAGS); + if (!is_msvc) { + /* TODO(rnp): support cross compiling with clang */ + if (!o->generic) cmd_append(a, &result, "-march=native"); + else if (is_amd64) cmd_append(a, &result, "-march=x86-64-v3"); + else if (is_aarch64) cmd_append(a, &result, "-march=armv8"); + } - if (o->debug) cmd_append(a, &result, "-O0", "-D_DEBUG", "-Wno-unused-function"); - else cmd_append(a, &result, "-O3"); + cmd_append(a, &result, COMMON_FLAGS, "-Iexternal/include"); + if (o->debug) cmd_append(a, &result, DEBUG_FLAGS); + else cmd_append(a, &result, OPTIMIZED_FLAGS); if (is_w32 && is_clang) cmd_append(a, &result, "-fms-extensions"); if (o->debug && is_unix) cmd_append(a, &result, "-ggdb"); - if (o->sanitize) cmd_append(a, &result, "-fsanitize=address,undefined"); + if (o->sanitize) { + if (!is_msvc) cmd_append(a, &result, "-fsanitize=address,undefined"); + else printf("warning: santizers not supported with this compiler\n"); + } if (o->report) { if (is_clang) cmd_append(a, &result, "-fproc-stat-report"); @@ -364,7 +400,10 @@ check_rebuild_self(Arena arena, i32 argc, char *argv[]) Options options = {0}; CommandList c = cmd_base(&arena, &options); - cmd_append(&arena, &c, "-Wno-unused-function", __FILE__, "-o", binary, (void *)0); + if (!is_msvc) cmd_append(&arena, &c, "-Wno-unused-function"); + cmd_append(&arena, &c, __FILE__, OUTPUT_EXE(binary)); + if (is_msvc) cmd_append(&arena, &c, "/link", "-incremental:no", "-opt:ref"); + cmd_append(&arena, &c, (void *)0); if (!run_synchronous(arena, &c)) { os_rename_file(old_name, binary); die("failed to rebuild self\n"); @@ -429,10 +468,16 @@ parse_options(i32 argc, char *argv[]) /* NOTE(rnp): produce pdbs on w32 */ function void -cmd_pdb(Arena *a, CommandList *cmd) +cmd_pdb(Arena *a, CommandList *cmd, char *name) { - if (is_w32 && is_clang) + if (is_w32 && is_clang) { cmd_append(a, cmd, "-fuse-ld=lld", "-g", "-gcodeview", "-Wl,--pdb="); + } else if (is_msvc) { + Stream sb = arena_stream(*a); + stream_append_s8s(&sb, s8("-PDB:"), c_str_to_s8(name), s8(".pdb")); + char *pdb = (char *)arena_stream_commit_zero(a, &sb).data; + cmd_append(a, cmd, "/link", "-incremental:no", "-opt:ref", "-DEBUG", pdb); + } } function void @@ -453,14 +498,15 @@ git_submodule_update(Arena a, char *name) } } -#define build_shared_library(a, cc, name, ...) build_shared_library_(a, cc, name, arg_list(char *, ##__VA_ARGS__)) function b32 -build_shared_library_(Arena a, CommandList cc, char *name, char **deps, iz deps_count) +build_shared_library(Arena a, CommandList cc, char *name, char *output, char **libs, iz libs_count, char **srcs, iz srcs_count) { b32 result = 0; - cmd_append(&a, &cc, "-fPIC", "-shared"); - cmd_append_count(&a, &cc, deps, deps_count); - cmd_append(&a, &cc, "-o", name, (void *)0); + cmd_append_count(&a, &cc, srcs, srcs_count); + cmd_append(&a, &cc, OUTPUT_DLL(output)); + cmd_pdb(&a, &cc, name); + cmd_append_count(&a, &cc, libs, libs_count); + cmd_append(&a, &cc, (void *)0); result = run_synchronous(a, &cc); return result; } @@ -472,13 +518,13 @@ build_static_library(Arena a, CommandList cc, char *name, char **deps, char **ou b32 result = 0; b32 all_success = 1; for (iz i = 0; i < count; i++) { - cmd_append(&a, &cc, "-c", deps[i], "-o", outputs[i], (void *)0); + cmd_append(&a, &cc, SINGLE_OBJECT(deps[i], outputs[i]), (void *)0); all_success &= run_synchronous(a, &cc); cc.count -= 5; } if (all_success) { CommandList ar = {0}; - cmd_append(&a, &ar, "ar", "rc", name); + cmd_append(&a, &ar, STATIC_LIBRARY_BEGIN(name)); cmd_append_count(&a, &ar, outputs, count); cmd_append(&a, &ar, (void *)0); result = run_synchronous(a, &ar); @@ -489,15 +535,14 @@ build_static_library(Arena a, CommandList cc, char *name, char **deps, char **ou function void check_build_raylib(Arena a, CommandList cc, b32 shared) { - char *libraylib = shared ? OS_SHARED_LIB("libraylib") : OUTPUT("libraylib.a"); - + char *libraylib = shared ? OS_SHARED_LINK_LIB("raylib") : OUTPUT_LIB(OUTPUT(OS_STATIC_LIB("raylib"))); if (needs_rebuild(libraylib, __FILE__, "external/include/rlgl.h", "external/raylib")) { git_submodule_update(a, "external/raylib"); os_copy_file("external/raylib/src/rlgl.h", "external/include/rlgl.h"); if (is_unix) cmd_append(&a, &cc, "-D_GLFW_X11"); cmd_append(&a, &cc, "-DPLATFORM_DESKTOP_GLFW", "-DGRAPHICS_API_OPENGL_43"); - cmd_append(&a, &cc, "-Wno-unused-but-set-variable"); + if (!is_msvc) cmd_append(&a, &cc, "-Wno-unused-but-set-variable"); cmd_append(&a, &cc, "-Iexternal/raylib/src", "-Iexternal/raylib/src/external/glfw/include"); #define RAYLIB_SOURCES \ X(rglfw) \ @@ -508,20 +553,16 @@ check_build_raylib(Arena a, CommandList cc, b32 shared) #define X(name) "external/raylib/src/" #name ".c", char *srcs[] = {"external/rcore_extended.c", RAYLIB_SOURCES}; #undef X - #define X(name) OUTPUT(#name ".o"), - char *outs[] = {OUTPUT("rcore_extended.o"), RAYLIB_SOURCES}; + #define X(name) OUTPUT(OBJECT(#name)), + char *outs[] = {OUTPUT(OBJECT("rcore_extended")), RAYLIB_SOURCES}; #undef X b32 success; if (shared) { + char *libs[] = {LINK_LIB("user32"), LINK_LIB("shell32"), LINK_LIB("gdi32"), LINK_LIB("winmm")}; + iz libs_count = is_w32 ? countof(libs) : 0; cmd_append(&a, &cc, "-DBUILD_LIBTYPE_SHARED", "-D_GLFW_BUILD_DLL"); - cmd_append(&a, &cc, "-fPIC", "-shared"); - cmd_pdb(&a, &cc); - cmd_append_count(&a, &cc, srcs, countof(srcs)); - cmd_append(&a, &cc, "-o", libraylib); - if (is_w32) cmd_append(&a, &cc, "-L.", "-lgdi32", "-lwinmm"); - cmd_append(&a, &cc, (void *)0); - success = run_synchronous(a, &cc); + success = build_shared_library(a, cc, "raylib", libraylib, libs, libs_count, srcs, countof(srcs)); } else { success = build_static_library(a, cc, libraylib, srcs, outs, countof(srcs)); } @@ -529,17 +570,6 @@ check_build_raylib(Arena a, CommandList cc, b32 shared) } } -/* NOTE(rnp): gcc requires these to appear at the end for no reason at all */ -function void -cmd_append_ldflags(Arena *a, CommandList *cc, b32 shared) -{ - cmd_pdb(a, cc); - cmd_append(a, cc, "-lm"); - if (shared && !is_w32) cmd_append(a, cc, "-Wl,-rpath,."); - if (shared) cmd_append(a, cc, "-L.", "-lraylib"); - if (is_w32) cmd_append(a, cc, "-lgdi32", "-lwinmm", "-lSynchronization"); -} - function b32 build_helper_library(Arena arena, CommandList cc) { @@ -547,13 +577,12 @@ build_helper_library(Arena arena, CommandList cc) // library char *library = OUTPUT(OS_SHARED_LIB("ogl_beamformer_lib")); char *srcs[] = {"helpers/ogl_beamformer_lib.c"}; + char *libs[] = {LINK_LIB("Synchronization")}; + iz libs_count = is_w32 ? countof(libs) : 0; - cmd_append(&arena, &cc, "-Wno-unused-function", "-fPIC", "-shared"); - cmd_append_count(&arena, &cc, srcs, countof(srcs)); - cmd_append(&arena, &cc, "-o", library); - if (is_w32) cmd_append(&arena, &cc, "-lSynchronization"); - cmd_append(&arena, &cc, (void *)0); - b32 result = run_synchronous(arena, &cc); + if (!is_msvc) cmd_append(&arena, &cc, "-Wno-unused-function"); + b32 result = build_shared_library(arena, cc, "ogl_beamformer_lib", library, + libs, libs_count, srcs, countof(srcs)); if (!result) fprintf(stderr, "failed to build: %s\n", library); ///////////// @@ -580,6 +609,20 @@ build_helper_library(Arena arena, CommandList cc) return result; } +function b32 +build_beamformer_as_library(Arena arena, CommandList cc) +{ + char *library = OS_SHARED_LIB("beamformer"); + char *srcs[] = {"beamformer.c"}; + char *libs[] = {!is_msvc? "-L." : "", LINK_LIB("raylib"), LINK_LIB("gdi32"), + LINK_LIB("shell32"), LINK_LIB("user32"), LINK_LIB("winmm"), LINK_LIB("Synchronization")}; + iz libs_count = is_w32 ? countof(libs) : 0; + b32 result = build_shared_library(arena, cc, "beamformer", library, + libs, libs_count, srcs, countof(srcs)); + if (!result) fprintf(stderr, "failed to build: %s\n", library); + return result; +} + i32 main(i32 argc, char *argv[]) { @@ -587,6 +630,8 @@ main(i32 argc, char *argv[]) check_rebuild_self(arena, argc, argv); Options options = parse_options(argc, argv); + if (options.debug && is_msvc) + die_("Debug build is not supported with MSVC\n"); os_make_directory(OUTDIR); @@ -597,19 +642,24 @@ main(i32 argc, char *argv[]) ///////////////////////// // hot reloadable portion - if (options.debug) { - iz c_count = c.count; - if (is_w32) cmd_append_ldflags(&arena, &c, 1); - if (!build_shared_library(arena, c, OS_SHARED_LIB("beamformer"), "beamformer.c")) - die("failed to build: " OS_SHARED_LIB("beamfomer") "\n"); - c.count = c_count; - } + if (options.debug) build_beamformer_as_library(arena, c); ////////////////// // static portion - cmd_append(&arena, &c, OS_MAIN, "-o", "ogl"); - if (!options.debug) cmd_append(&arena, &c, OUTPUT("libraylib.a")); - cmd_append_ldflags(&arena, &c, options.debug); + cmd_append(&arena, &c, OS_MAIN, OUTPUT_EXE("ogl")); + cmd_pdb(&arena, &c, "ogl"); + if (!is_msvc) cmd_append(&arena, &c, "-lm"); + if (options.debug) { + if (!is_w32) cmd_append(&arena, &c, "-Wl,-rpath,."); + if (!is_msvc) cmd_append(&arena, &c, "-L."); + cmd_append(&arena, &c, LINK_LIB("raylib")); + } else { + cmd_append(&arena, &c, OUTPUT(OS_STATIC_LIB("raylib"))); + } + if (is_w32) { + cmd_append(&arena, &c, LINK_LIB("user32"), LINK_LIB("shell32"), LINK_LIB("gdi32"), + LINK_LIB("winmm"), LINK_LIB("Synchronization")); + } cmd_append(&arena, &c, (void *)0); return !run_synchronous(arena, &c); diff --git a/compiler.h b/compiler.h @@ -10,11 +10,11 @@ #error Unsupported Operating System #endif -#ifdef __clang__ +#if defined(__clang__) #define COMPILER_CLANG 1 -#elif _MSC_VER +#elif defined(_MSC_VER) #define COMPILER_MSVC 1 -#elif __GNUC__ +#elif defined(__GNUC__) #define COMPILER_GCC 1 #else #error Unsupported Compiler diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -250,9 +250,9 @@ beamformer_start_compute(u32 image_plane_tag) b32 result = 0; if (image_plane_tag < IPT_LAST) { if (check_shared_memory()) { - if (atomic_load(&g_bp->dispatch_compute_sync) == 0) { + if (atomic_load_u32(&g_bp->dispatch_compute_sync) == 0) { g_bp->current_image_plane = image_plane_tag; - atomic_store(&g_bp->dispatch_compute_sync, 1); + atomic_store_u32(&g_bp->dispatch_compute_sync, 1); result = 1; } else { g_lib_last_error = BF_LIB_ERR_KIND_SYNC_VARIABLE; @@ -398,7 +398,7 @@ send_data(void *data, u32 data_size) if (result) { /* TODO(rnp): should we just set timeout on acquiring the lock instead of this? */ try_wait_sync(&g_bp->raw_data_sync, -1, os_wait_on_value); - atomic_store(&g_bp->raw_data_sync, 1); + atomic_store_u32(&g_bp->raw_data_sync, 1); } } return result; diff --git a/intrinsics.c b/intrinsics.c @@ -26,13 +26,17 @@ #define debugbreak() __debugbreak() #define unreachable() __assume(0) - #define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) - #define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) - #define atomic_swap(ptr, n) __atomic_exchange_n(ptr, n, __ATOMIC_RELEASE) - #define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) - #define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE) - #define atomic_inc_u32(ptr, n) _InterlockedAdd((volatile u32 *)ptr, n) - #define atomic_cas(ptr, cptr, n) __atomic_compare_exchange_n(ptr, cptr, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + #define atomic_store_u32(ptr, n) *((volatile u32 *)(ptr)) = (n) + #define atomic_load_u64(ptr) *((volatile u64 *)(ptr)) + #define atomic_load_u32(ptr) *((volatile u32 *)(ptr)) + #define atomic_and_u64(ptr, n) _InterlockedAnd64((volatile u64 *)(ptr), (n)) + #define atomic_add_u64(ptr, n) _InterlockedAdd64((volatile u64 *)(ptr), (n)) + #define atomic_add_u32(ptr, n) _InterlockedAdd((volatile u32 *)(ptr), (n)) + #define atomic_cas_u64(ptr, cptr, n) (_InterlockedCompareExchange64((volatile u64 *)(ptr), *(cptr), (n)) == *(cptr)) + #define atomic_cas_u32(ptr, cptr, n) (_InterlockedCompareExchange((volatile u32 *)(ptr), *(cptr), (n)) == *(cptr)) + + #define sqrt_f32(a) sqrtf(a) + #define atan2_f32(y, x) atan2f(y, x) #else #define align_as(n) __attribute__((aligned(n))) @@ -47,19 +51,45 @@ #endif #define unreachable() __builtin_unreachable() - #define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) - #define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) - #define atomic_swap(ptr, n) __atomic_exchange_n(ptr, n, __ATOMIC_RELEASE) - #define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) - #define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE) - #define atomic_inc_u32(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_ACQ_REL) - #define atomic_cas(ptr, cptr, n) __atomic_compare_exchange_n(ptr, cptr, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + #define atomic_store_u32(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) + #define atomic_load_u64(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) + #define atomic_and_u64(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) + #define atomic_add_u64(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_ACQ_REL) + #define atomic_cas_u64(ptr, cptr, n) __atomic_compare_exchange_n(ptr, cptr, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + #define atomic_add_u32 atomic_add_u64 + #define atomic_cas_u32 atomic_cas_u64 + #define atomic_load_u32 atomic_load_u64 + + #define sqrt_f32(a) __builtin_sqrtf(a) + #define atan2_f32(y, x) __builtin_atan2f(y, x) #endif -/* TODO(rnp): msvc probably won't build this but there are other things preventing that as well */ -#define sqrt_f32(a) __builtin_sqrtf(a) -#define atan2_f32(y, x) __builtin_atan2f(y, x) +#if COMPILER_MSVC + +function force_inline u32 +clz_u32(u32 a) +{ + u32 result = 32, index; + if (a) { + _BitScanReverse(&index, a); + result = index; + } + return result; +} + +function force_inline u32 +ctz_u32(u32 a) +{ + u32 result = 32, index; + if (a) { + _BitScanForward(&index, a); + result = index; + } + return result; +} + +#else /* !COMPILER_MSVC */ function force_inline u32 clz_u32(u32 a) @@ -77,6 +107,8 @@ ctz_u32(u32 a) return result; } +#endif + #if ARCH_ARM64 /* NOTE(rnp): we are only doing a handful of f32x4 operations so we will just use NEON and do * the macro renaming thing. If you are implementing a serious wide vector operation you should diff --git a/os_linux.c b/os_linux.c @@ -275,7 +275,7 @@ function OS_WAIT_ON_VALUE_FN(os_wait_on_value) function OS_WAKE_WAITERS_FN(os_wake_waiters) { if (sync) { - atomic_inc_u32(sync, 1); + atomic_store_u32(sync, 1); syscall(SYS_futex, sync, FUTEX_WAKE, I32_MAX, 0, 0, 0); } } diff --git a/os_win32.c b/os_win32.c @@ -364,7 +364,7 @@ function OS_WAIT_ON_VALUE_FN(os_wait_on_value) function OS_WAKE_WAITERS_FN(os_wake_waiters) { if (sync) { - atomic_inc_u32(sync, 1); + atomic_add_u32(sync, 1); WakeByAddressAll(sync); } } diff --git a/static.c b/static.c @@ -26,7 +26,7 @@ function FILE_WATCH_CALLBACK_FN(debug_reload) /* NOTE(rnp): spin until compute thread finishes its work (we will probably * never reload while compute is in progress but just incase). */ - while (!atomic_load(&os->compute_worker.asleep)); + while (!atomic_load_u32(&os->compute_worker.asleep)); os_unload_library(debug_lib); debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err); @@ -242,12 +242,12 @@ function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point) for (;;) { for (;;) { - i32 current = atomic_load(&ctx->sync_variable); - if (current && atomic_swap(&ctx->sync_variable, 0) == current) + i32 expected = 1; + if (atomic_cas_u32(&ctx->sync_variable, &expected, 0)) break; ctx->asleep = 1; - os_wait_on_value(&ctx->sync_variable, current, -1); + os_wait_on_value(&ctx->sync_variable, 0, -1); ctx->asleep = 0; } beamformer_complete_compute(ctx->user_context, ctx->arena, ctx->gl_context); @@ -328,7 +328,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) #endif #define X(name, type, size, gltype, glsize, comment) "\t" #gltype " " #name #glsize "; " comment "\n" - read_only local_persist s8 compute_parameters_header = s8("" + read_only local_persist s8 compute_parameters_header = s8_comp("" "layout(std140, binding = 0) uniform parameters {\n" BEAMFORMER_PARAMS_HEAD BEAMFORMER_UI_PARAMS diff --git a/ui.c b/ui.c @@ -948,14 +948,14 @@ add_beamformer_parameters_view(Variable *parent, BeamformerCtx *ctx) &bp->off_axis_pos, (v2){.x = -1e3, .y = 1e3}, 0.25e3, 0.5e-3, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); - local_persist s8 beamform_plane_labels[] = {s8("XZ"), s8("YZ")}; + read_only local_persist s8 beamform_plane_labels[] = {s8_comp("XZ"), s8_comp("YZ")}; add_variable_cycler(ui, group, &ui->arena, V_CAUSES_COMPUTE, ui->font, s8("Beamform Plane:"), (u32 *)&bp->beamform_plane, beamform_plane_labels, countof(beamform_plane_labels)); add_beamformer_variable_f32(ui, group, &ui->arena, s8("F#:"), s8(""), &bp->f_number, (v2){.y = 1e3}, 1, 0.1, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); - local_persist s8 true_false_labels[] = {s8("False"), s8("True")}; + read_only local_persist s8 true_false_labels[] = {s8_comp("False"), s8_comp("True")}; add_variable_cycler(ui, group, &ui->arena, V_CAUSES_COMPUTE, ui->font, s8("Interpolate:"), &bp->interpolate, true_false_labels, countof(true_false_labels)); @@ -1017,8 +1017,8 @@ add_beamformer_frame_view(BeamformerUI *ui, Variable *parent, Arena *arena, switch (type) { case FVT_LATEST: { - #define X(_type, _id, pretty) s8(pretty), - local_persist s8 labels[] = { IMAGE_PLANE_TAGS s8("Any") }; + #define X(_type, _id, pretty) s8_comp(pretty), + read_only local_persist s8 labels[] = {IMAGE_PLANE_TAGS s8_comp("Any")}; #undef X bv->cycler = add_variable_cycler(ui, menu, arena, 0, ui->small_font, s8("Live: "), &bv->cycler_state, labels, countof(labels)); @@ -1255,8 +1255,8 @@ lerp_v4(v4 a, v4 b, f32 t) function s8 push_das_shader_kind(Stream *s, DASShaderKind shader, u32 transmit_count) { - #define X(type, id, pretty, fixed_tx) s8(pretty), - read_only local_persist s8 pretty_names[DASShaderKind_Count + 1] = {DAS_TYPES s8("Invalid")}; + #define X(type, id, pretty, fixed_tx) s8_comp(pretty), + read_only local_persist s8 pretty_names[DASShaderKind_Count + 1] = {DAS_TYPES s8_comp("Invalid")}; #undef X #define X(type, id, pretty, fixed_tx) fixed_tx, read_only local_persist u8 fixed_transmits[DASShaderKind_Count + 1] = {DAS_TYPES 0}; @@ -1292,8 +1292,8 @@ push_custom_view_title(Stream *s, Variable *var) switch (bv->type) { case FVT_COPY: stream_append_s8(s, s8(": Copy [")); break; case FVT_LATEST: { - #define X(plane, id, pretty) s8(": " pretty " ["), - local_persist s8 labels[IPT_LAST + 1] = { IMAGE_PLANE_TAGS s8(": Live [") }; + #define X(plane, id, pretty) s8_comp(": " pretty " ["), + read_only local_persist s8 labels[IPT_LAST + 1] = {IMAGE_PLANE_TAGS s8_comp(": Live [")}; #undef X stream_append_s8(s, labels[*bv->cycler->u.cycler.state % (IPT_LAST + 1)]); } break; @@ -1961,8 +1961,8 @@ draw_compute_progress_bar(BeamformerUI *ui, Arena arena, ComputeProgressBar *sta function v2 draw_compute_stats_view(BeamformerCtx *ctx, Arena arena, ComputeShaderStats *stats, Rect r) { - #define X(e, n, s, h, pn) [ComputeShaderKind_##e] = s8(pn ":"), - read_only local_persist s8 labels[ComputeShaderKind_Count] = { COMPUTE_SHADERS }; + #define X(e, n, s, h, pn) [ComputeShaderKind_##e] = s8_comp(pn ":"), + read_only local_persist s8 labels[ComputeShaderKind_Count] = {COMPUTE_SHADERS}; #undef X BeamformerUI *ui = ctx->ui; diff --git a/util.h b/util.h @@ -5,6 +5,8 @@ #include <stddef.h> #include <stdint.h> +#include "compiler.h" + #ifndef asm #define asm __asm__ #endif @@ -14,19 +16,19 @@ #endif #ifdef _DEBUG - #if OS_WINDOWS - #define DEBUG_EXPORT __declspec(dllexport) - #else - #define DEBUG_EXPORT - #endif - #define DEBUG_DECL(a) a - #define ASSERT(c) do { if (!(c)) debugbreak(); } while (0) + #if OS_WINDOWS + #define DEBUG_EXPORT __declspec(dllexport) + #else + #define DEBUG_EXPORT + #endif + #define DEBUG_DECL(a) a + #define assert(c) do { if (!(c)) debugbreak(); } while (0) #else - #define DEBUG_EXPORT function - #define DEBUG_DECL(a) - #define ASSERT(c) + #define DEBUG_EXPORT function + #define DEBUG_DECL(a) + #define assert(c) #endif -#define assert ASSERT +#define ASSERT assert #define INVALID_CODE_PATH ASSERT(0) #define INVALID_DEFAULT_CASE default: ASSERT(0); break @@ -81,7 +83,7 @@ #define I32_MAX (0x7FFFFFFFL) #define U32_MAX (0xFFFFFFFFUL) -#define F32_INFINITY (__builtin_inff()) +#define F32_INFINITY (1e+300*1e+300) typedef char c8; typedef uint8_t u8; @@ -106,6 +108,7 @@ typedef struct { Arena *arena; u8 *old_beg; } TempArena; typedef struct { iz len; u8 *data; } s8; #define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s} +#define s8_comp(s) {sizeof(s) - 1, (u8 *)s} typedef struct { iz len; u16 *data; } s16;