ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 7ad8da41d47eed3f32ecc8724a465991374cbf45
Parent: 8bec7c3e173b637423b2d06c0251cf90fc1536c6
Author: Randy Palamar
Date:   Mon,  2 Jun 2025 20:09:12 -0600

build: make build.c compile with msvc

Diffstat:
Mbeamformer.c | 4++--
Mbuild.c | 2+-
Mintrinsics.c | 52+++++++++++++++++++++++++++++++++++++++-------------
Mos_linux.c | 2+-
Mos_win32.c | 11++++++-----
Mutil.h | 16++++------------
6 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -158,7 +158,7 @@ fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, ImagePlaneTag pl b32 result = 0; if (work) { result = 1; - u32 frame_id = atomic_inc(&ctx->next_render_frame_index, 1); + u32 frame_id = atomic_inc_u32(&ctx->next_render_frame_index, 1); u32 frame_index = frame_id % ARRAY_COUNT(ctx->beamform_frames); work->type = BW_COMPUTE; work->frame = ctx->beamform_frames + frame_index; @@ -623,7 +623,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co /* TODO(rnp): not really sure what to do here */ mem_copy(&ctx->averaged_frames[aframe_index].stats.times, &frame->stats.times, sizeof(frame->stats.times)); - atomic_inc(&ctx->averaged_frame_index, 1); + atomic_inc_u32(&ctx->averaged_frame_index, 1); } frame->ready_to_present = 1; cs->processing_compute = 0; diff --git a/build.c b/build.c @@ -82,7 +82,7 @@ typedef struct { } Options; #define die(fmt, ...) die_("%s: " fmt, __FUNCTION__, ##__VA_ARGS__) -function void __attribute__((noreturn)) +function no_return void die_(char *format, ...) { va_list ap; diff --git a/intrinsics.c b/intrinsics.c @@ -18,18 +18,49 @@ #define read_only #endif +#if COMPILER_MSVC + #define align_as(n) __declspec(align(n)) + #define pack_struct(s) __pragma(pack(push, 1)) s __pragma(pack(pop)) + #define no_return __declspec(noreturn) + + #define debugbreak() __debugbreak() + #define unreachable() __assume(0) + + #define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) + #define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) + #define atomic_swap(ptr, n) __atomic_exchange_n(ptr, n, __ATOMIC_RELEASE) + #define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) + #define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE) + #define atomic_inc_u32(ptr, n) _InterlockedAdd((volatile u32 *)ptr, n) + #define atomic_cas(ptr, cptr, n) __atomic_compare_exchange_n(ptr, cptr, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#else + #define align_as(n) __attribute__((aligned(n))) + #define pack_struct(s) s __attribute__((packed)) + #define no_return __attribute__((noreturn)) + + #if ARCH_ARM64 + /* TODO? debuggers just loop here forever and need a manual PC increment (step over) */ + #define debugbreak() asm volatile ("brk 0xf000") + #else + #define debugbreak() asm volatile ("int3; nop") + #endif + #define unreachable() __builtin_unreachable() + + #define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) + #define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) + #define atomic_swap(ptr, n) __atomic_exchange_n(ptr, n, __ATOMIC_RELEASE) + #define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) + #define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE) + #define atomic_inc_u32(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_ACQ_REL) + #define atomic_cas(ptr, cptr, n) __atomic_compare_exchange_n(ptr, cptr, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#endif + /* TODO(rnp): msvc probably won't build this but there are other things preventing that as well */ #define sqrt_f32(a) __builtin_sqrtf(a) #define atan2_f32(y, x) __builtin_atan2f(y, x) -#define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) -#define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) -#define atomic_swap(ptr, n) __atomic_exchange_n(ptr, n, __ATOMIC_RELEASE) -#define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) -#define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE) -#define atomic_inc(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_ACQ_REL) -#define atomic_cas(ptr, cptr, n) __atomic_compare_exchange_n(ptr, cptr, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) - function force_inline u32 clz_u32(u32 a) { @@ -47,9 +78,6 @@ ctz_u32(u32 a) } #if ARCH_ARM64 -/* TODO? debuggers just loop here forever and need a manual PC increment (step over) */ -#define debugbreak() asm volatile ("brk 0xf000") - /* NOTE(rnp): we are only doing a handful of f32x4 operations so we will just use NEON and do * the macro renaming thing. If you are implementing a serious wide vector operation you should * use SVE(2) instead. The semantics are different however and the code will be written for an @@ -87,6 +115,4 @@ typedef __m128i i32x4; #define store_f32x4(a, o) _mm_storeu_ps(o, a) #define store_i32x4(a, o) _mm_storeu_si128((i32x4 *)o, a) -#define debugbreak() asm volatile ("int3; nop") - #endif diff --git a/os_linux.c b/os_linux.c @@ -275,7 +275,7 @@ function OS_WAIT_ON_VALUE_FN(os_wait_on_value) function OS_WAKE_WAITERS_FN(os_wake_waiters) { if (sync) { - atomic_inc(sync, 1); + atomic_inc_u32(sync, 1); syscall(SYS_futex, sync, FUTEX_WAKE, I32_MAX, 0, 0, 0); } } diff --git a/os_win32.c b/os_win32.c @@ -38,7 +38,8 @@ * incorrectly. They worked around it be making the ft* members a struct {u32, u32} which * is aligned on a 4-byte boundary. Then in their documentation they explicitly tell you not * to cast to u64 because "it can cause alignment faults on 64-bit Windows" - go figure */ -typedef struct __attribute__((packed)) { +typedef struct w32_file_info w32_file_info; +pack_struct(struct w32_file_info { u32 dwFileAttributes; u64 ftCreationTime; u64 ftLastAccessTime; @@ -49,7 +50,7 @@ typedef struct __attribute__((packed)) { u32 nNumberOfLinks; u32 nFileIndexHigh; u32 nFileIndexLow; -} w32_file_info; +}); typedef struct { u32 next_entry_offset; @@ -134,14 +135,14 @@ function OS_WRITE_FILE_FN(os_write_file) return raw.len == wlen; } -function void __attribute__((noreturn)) +function no_return void os_exit(i32 code) { ExitProcess(1); unreachable(); } -function void __attribute__((noreturn)) +function no_return void os_fatal(s8 msg) { os_write_file(GetStdHandle(STD_ERROR_HANDLE), msg); @@ -363,7 +364,7 @@ function OS_WAIT_ON_VALUE_FN(os_wait_on_value) function OS_WAKE_WAITERS_FN(os_wake_waiters) { if (sync) { - atomic_inc(sync, 1); + atomic_inc_u32(sync, 1); WakeByAddressAll(sync); } } diff --git a/util.h b/util.h @@ -13,16 +13,8 @@ #define typeof __typeof__ #endif -#ifndef unreachable -#ifdef _MSC_VER - #define unreachable() __assume(0) -#else - #define unreachable() __builtin_unreachable() -#endif -#endif - #ifdef _DEBUG - #ifdef _WIN32 + #if OS_WINDOWS #define DEBUG_EXPORT __declspec(dllexport) #else #define DEBUG_EXPORT @@ -306,7 +298,7 @@ typedef RENDERDOC_START_FRAME_CAPTURE_FN(renderdoc_start_frame_capture_fn); #define RENDERDOC_END_FRAME_CAPTURE_FN(name) b32 name(iptr gl_context, iptr window_handle) typedef RENDERDOC_END_FRAME_CAPTURE_FN(renderdoc_end_frame_capture_fn); -typedef __attribute__((aligned(16))) u8 RenderDocAPI[216]; +typedef align_as(16) u8 RenderDocAPI[216]; #define RENDERDOC_API_FN_ADDR(a, offset) (*(iptr *)((*a) + offset)) #define RENDERDOC_START_FRAME_CAPTURE(a) (renderdoc_start_frame_capture_fn *)RENDERDOC_API_FN_ADDR(a, 152) #define RENDERDOC_END_FRAME_CAPTURE(a) (renderdoc_end_frame_capture_fn *) RENDERDOC_API_FN_ADDR(a, 168) @@ -322,8 +314,8 @@ struct OS { char *export_pipe_name; - DEBUG_DECL(renderdoc_start_frame_capture_fn *start_frame_capture); - DEBUG_DECL(renderdoc_end_frame_capture_fn *end_frame_capture); + DEBUG_DECL(renderdoc_start_frame_capture_fn *start_frame_capture;) + DEBUG_DECL(renderdoc_end_frame_capture_fn *end_frame_capture;) }; #define LABEL_GL_OBJECT(type, id, s) {s8 _s = (s); glObjectLabel(type, id, _s.len, (c8 *)_s.data);}