ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 29a0c74a146dbe0c0fc56df3efa7fb674f6cf7f6
Parent: dd807eeaeef43ab246bdaf8042676b1366d88ecb
Author: Randy Palamar
Date:   Sun, 11 Jan 2026 06:55:48 -0700

core: finish isolation of os functions

beamformer code can no longer see any os functions besides the
ones listed in beamformer.h

Diffstat:
Mbeamformer.c | 168++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mbeamformer.h | 99++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mbeamformer_core.c | 84++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mbeamformer_internal.h | 40+++++++++++++++++++++++++++++++++-------
Mbeamformer_shared_memory.c | 46++++++++++++++++++++++++++++++++++++----------
Mbuild.c | 24+++++++++++++++++-------
Mcompiler.h | 9+++++++++
Mlib/ogl_beamformer_lib.c | 120++++++++++++++++++++++++++++++++-----------------------------------------------
Mmain_linux.c | 215+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mmain_w32.c | 275++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Mos_linux.c | 247++++++++++---------------------------------------------------------------------
Mos_win32.c | 300++++++++++---------------------------------------------------------------------
Mtests/throughput.c | 23++++-------------------
Mthreads.c | 4++--
Mui.c | 33++++++++++++++-------------------
Mutil.h | 84++++++++++++++++++++-----------------------------------------------------------
Mutil_gl.c | 4++--
Autil_os.c | 26++++++++++++++++++++++++++
18 files changed, 898 insertions(+), 903 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -1,15 +1,17 @@ /* See LICENSE for license details. */ +#include "beamformer_internal.h" + /* NOTE(rnp): magic variables to force discrete GPU usage on laptops with multiple devices */ EXPORT i32 NvOptimusEnablement = 1; EXPORT i32 AmdPowerXpressRequestHighPerformance = 1; -#include "beamformer_internal.h" - #if !BEAMFORMER_DEBUG #include "beamformer_core.c" #else +typedef void beamformer_frame_step_fn(BeamformerInput *); + #define BEAMFORMER_DEBUG_ENTRY_POINTS \ X(beamformer_debug_ui_deinit) \ X(beamformer_complete_compute) \ @@ -21,10 +23,9 @@ BEAMFORMER_DEBUG_ENTRY_POINTS #undef X BEAMFORMER_EXPORT void -beamformer_debug_hot_reload(BeamformerLibraryHandle library, BeamformerInput *input) +beamformer_debug_hot_reload(OSLibrary library, BeamformerInput *input) { BeamformerCtx *ctx = BeamformerContextMemory(input->memory); - Stream err = ctx->error_stream; // TODO(rnp): this will deadlock if live imaging is active /* NOTE(rnp): spin until compute thread finishes its work (we will probably @@ -32,29 +33,60 @@ beamformer_debug_hot_reload(BeamformerLibraryHandle library, BeamformerInput *in spin_wait(atomic_load_u32(&ctx->upload_worker.awake)); spin_wait(atomic_load_u32(&ctx->compute_worker.awake)); - #define X(name) name = os_lookup_symbol(library, #name, &err); + #define X(name) name = os_lookup_symbol(library, #name); BEAMFORMER_DEBUG_ENTRY_POINTS #undef X - stream_append_s8(&err, s8("reloaded main executable\n")); - os_write_file(os_error_handle(), stream_to_s8(&err)); + s8 info = beamformer_info("reloaded main executable"); + os_console_log(info.data, info.len); } #endif /* BEAMFORMER_DEBUG */ +function no_return void +fatal(s8 message) +{ + os_fatal(message.data, message.len); + unreachable(); +} + +// TODO(rnp): none of this belongs here, but will be removed +// once vulkan migration is complete +#define GLFW_VISIBLE 0x00020004 +void glfwWindowHint(i32, i32); +iptr glfwCreateWindow(i32, i32, char *, iptr, iptr); +void glfwMakeContextCurrent(iptr); +iptr glfwGetGLXContext(iptr); +iptr glfwGetWGLContext(iptr); +void * glfwGetProcAddress(char *); + +#if OS_WINDOWS +function iptr +os_get_native_gl_context(iptr window) +{ + return glfwGetWGLContext(window); +} +#else +function iptr +os_get_native_gl_context(iptr window) +{ + return glfwGetGLXContext(window); +} +#endif + function void gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx) { Stream *e = (Stream *)userctx; stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n")); - os_write_file(os_error_handle(), stream_to_s8(e)); + os_console_log(e->data, e->widx); stream_reset(e, 0); } function void load_gl(Stream *err) { - #define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name); + #define X(name, ret, params) name = (name##_fn *)glfwGetProcAddress(#name); OGLProcedureList #undef X @@ -63,7 +95,7 @@ load_gl(Stream *err) char *vendor = (char *)glGetString(GL_VENDOR); if (!vendor) { stream_append_s8(err, s8("Failed to determine GL Vendor\n")); - os_fatal(stream_to_s8(err)); + fatal(stream_to_s8(err)); } /* TODO(rnp): str prefix of */ switch (vendor[0]) { @@ -76,7 +108,7 @@ load_gl(Stream *err) case 'M': gl_parameters.vendor_id = GLVendor_ARM; break; default: stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n")); - os_fatal(stream_to_s8(err)); + fatal(stream_to_s8(err)); } #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name); @@ -111,7 +143,7 @@ load_gl(Stream *err) GL_PARAMETERS #undef X stream_append_s8(err, s8("-----------------------\n")); - os_write_file(os_error_handle(), stream_to_s8(err)); + os_console_log(err->data, err->widx); } #endif @@ -127,26 +159,26 @@ load_gl(Stream *err) OGLProcedureList #undef X - if (err->widx) os_fatal(stream_to_s8(err)); + if (err->widx) fatal(stream_to_s8(err)); } } function void -beamformer_load_cuda_library(BeamformerCtx *ctx, BeamformerLibraryHandle cuda, Arena arena) +beamformer_load_cuda_library(BeamformerCtx *ctx, OSLibrary cuda, Arena arena) { /* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently * causing a major performance regression. for now we are disabling its use * altogether. it will be reenabled once the issue can be fixed */ - b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && cuda.value[0] != BeamformerInvalidHandle.value[0]; + b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && ValidHandle(cuda); if (result) { Stream err = arena_stream(arena); stream_append_s8(&err, beamformer_info("loading CUDA library functions")); - #define X(name, symname) cuda_## name = os_lookup_symbol(cuda, symname, &err); + #define X(name, symname) cuda_## name = os_lookup_symbol(cuda, symname); CUDALibraryProcedureList #undef X - os_write_file(os_error_handle(), stream_to_s8(&err)); + os_console_log(err.data, err.widx); } #define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub; @@ -189,11 +221,6 @@ render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *in return result; } -#define GLFW_VISIBLE 0x00020004 -void glfwWindowHint(i32, i32); -iptr glfwCreateWindow(i32, i32, char *, iptr, iptr); -void glfwMakeContextCurrent(iptr); - function void worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm) { @@ -207,14 +234,14 @@ worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm) /* TODO(rnp): clean this crap up; we shouldn't need two values to communicate this */ atomic_store_u32(&ctx->awake, 0); - os_wait_on_value(&ctx->sync_variable, 1, (u32)-1); + os_wait_on_address(&ctx->sync_variable, 1, (u32)-1); atomic_store_u32(&ctx->awake, 1); } } function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point) { - GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx; + GLWorkerThreadContext *ctx = user_context; glfwMakeContextCurrent(ctx->window_handle); ctx->gl_context = os_get_native_gl_context(ctx->window_handle); @@ -224,7 +251,7 @@ function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point) beamformer->compute_context.shader_timer_ids); for (;;) { - worker_thread_sleep(ctx, beamformer->shared_memory.region); + worker_thread_sleep(ctx, beamformer->shared_memory); asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg); beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context); } @@ -236,7 +263,7 @@ function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point) function OS_THREAD_ENTRY_POINT_FN(beamformer_upload_entry_point) { - GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx; + GLWorkerThreadContext *ctx = user_context; glfwMakeContextCurrent(ctx->window_handle); ctx->gl_context = os_get_native_gl_context(ctx->window_handle); @@ -246,7 +273,7 @@ function OS_THREAD_ENTRY_POINT_FN(beamformer_upload_entry_point) glQueryCounter(up->rf_buffer->data_timestamp_query, GL_TIMESTAMP); for (;;) { - worker_thread_sleep(ctx, up->shared_memory->region); + worker_thread_sleep(ctx, up->shared_memory); beamformer_rf_upload(up); } @@ -292,17 +319,38 @@ beamformer_init(BeamformerInput *input) ctx->compute_shader_stats = push_struct(&memory, ComputeShaderStats); ctx->compute_timing_table = push_struct(&memory, ComputeTimingTable); + ctx->shared_memory = input->shared_memory; + if (!ctx->shared_memory) fatal(s8("Get more ram lol\n")); + zero_struct(ctx->shared_memory); + + ctx->shared_memory->version = BEAMFORMER_SHARED_MEMORY_VERSION; + ctx->shared_memory->reserved_parameter_blocks = 1; + /* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores * on w32 but thats what we are doing for now */ - u32 lock_count = (u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots; - ctx->shared_memory = os_create_shared_memory_area(&memory, OS_SHARED_MEMORY_NAME, lock_count, - BEAMFORMER_SHARED_MEMORY_SIZE); - BeamformerSharedMemory *sm = ctx->shared_memory.region; - if (!sm) os_fatal(s8("Get more ram lol\n")); - mem_clear(sm, 0, sizeof(*sm)); - - sm->version = BEAMFORMER_SHARED_MEMORY_VERSION; - sm->reserved_parameter_blocks = 1; + #if OS_WINDOWS + { + Stream sb = arena_stream(memory); + stream_append(&sb, input->shared_memory_name, input->shared_memory_name_length); + stream_append_s8(&sb, s8("_lock_")); + i32 start_index = sb.widx; + for EachElement(os_w32_shared_memory_semaphores, it) { + stream_reset(&sb, start_index); + stream_append_u64(&sb, it); + stream_append_byte(&sb, 0); + os_w32_shared_memory_semaphores[it] = os_w32_create_semaphore((c8 *)sb.data, 1, 1); + if InvalidHandle(os_w32_shared_memory_semaphores[it]) + fatal(beamformer_info("init: failed to create w32 shared memory semaphore\n")); + + /* NOTE(rnp): hacky garbage because CreateSemaphore will just open an existing + * semaphore without any indication. Sometimes the other side of the shared memory + * will provide incorrect parameters or will otherwise fail and its faster to + * restart this program than to get that application to release the semaphores */ + /* TODO(rnp): figure out something more robust */ + os_w32_semaphore_release(os_w32_shared_memory_semaphores[it], 1); + } + } + #endif BeamformerComputeContext *cs = &ctx->compute_context; @@ -310,19 +358,17 @@ beamformer_init(BeamformerInput *input) /* TODO(rnp): we should lock this down after we have something working */ worker->user_context = (iptr)ctx; worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle); - worker->handle = os_create_thread((iptr)worker, compute_worker_thread_entry_point); - os_set_thread_name(worker->handle, s8("[compute]")); + worker->handle = os_create_thread("[compute]", worker, compute_worker_thread_entry_point); GLWorkerThreadContext *upload = &ctx->upload_worker; BeamformerUploadThreadContext *upctx = push_struct(&memory, typeof(*upctx)); upload->user_context = (iptr)upctx; upctx->rf_buffer = &cs->rf_buffer; - upctx->shared_memory = &ctx->shared_memory; + upctx->shared_memory = ctx->shared_memory; upctx->compute_timing_table = ctx->compute_timing_table; upctx->compute_worker_sync = &ctx->compute_worker.sync_variable; upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle); - upload->handle = os_create_thread((iptr)upload, beamformer_upload_entry_point); - os_set_thread_name(upload->handle, s8("[upload]")); + upload->handle = os_create_thread("[upload]", upload, beamformer_upload_entry_point); glfwMakeContextCurrent(raylib_window_handle); @@ -339,7 +385,7 @@ beamformer_init(BeamformerInput *input) for EachElement(beamformer_reloadable_compute_shader_info_indices, it) { i32 index = beamformer_reloadable_compute_shader_info_indices[it]; Arena temp = scratch; - s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"), + s8 file = push_s8_from_parts(&temp, os_path_separator(), s8("shaders"), beamformer_reloadable_shader_files[index]); BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc)); frc->kind = BeamformerFileReloadKind_ComputeShader; @@ -507,10 +553,11 @@ beamformer_init(BeamformerInput *input) memory.end = scratch.end; ctx->arena = memory; + ctx->state = BeamformerState_Running; } -function void -beamformer_invalidate_shared_memory(void *memory) +BEAMFORMER_EXPORT void +beamformer_terminate(BeamformerInput *input) { /* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live * imaging. if the verasonics is blocked in an external function (calling the library @@ -519,14 +566,31 @@ beamformer_invalidate_shared_memory(void *memory) * into an error state and release dispatch lock so that future calls will error instead * of blocking. */ - BeamformerCtx *ctx = BeamformerContextMemory(memory); - BeamformerSharedMemory *sm = ctx->shared_memory.region; - BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute; - atomic_store_u32(&sm->invalid, 1); - atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx); - DEBUG_DECL(if (sm->locks[lock])) { - os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)lock); + BeamformerCtx * ctx = BeamformerContextMemory(input->memory); + BeamformerSharedMemory * sm = input->shared_memory; + if (ctx->state != BeamformerState_Terminated) { + if (sm) { + BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute; + atomic_store_u32(&sm->invalid, 1); + atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx); + DEBUG_DECL(if (sm->locks[lock])) { + beamformer_shared_memory_release_lock(sm, (i32)lock); + } + + atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging); + } + + beamformer_debug_ui_deinit(ctx); + + ctx->state = BeamformerState_Terminated; } +} - atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging); +BEAMFORMER_EXPORT u32 +beamformer_should_close(BeamformerInput *input) +{ + BeamformerCtx * ctx = BeamformerContextMemory(input->memory); + if (ctx->state == BeamformerState_ShouldClose) + beamformer_terminate(input); + return ctx->state == BeamformerState_Terminated; } diff --git a/beamformer.h b/beamformer.h @@ -63,23 +63,52 @@ /////////////////// // REQUIRED OS API -#define BeamformerInvalidHandle (BeamformerLibraryHandle){-1} -typedef struct { uint64_t value[1]; } BeamformerLibraryHandle; +#define OSInvalidHandleValue ((u64)-1) +typedef struct { uint64_t value[1]; } OSBarrier; +typedef struct { uint64_t value[1]; } OSHandle; +typedef struct { uint64_t value[1]; } OSLibrary; +typedef struct { uint64_t value[1]; } OSThread; +typedef struct { uint64_t value[1]; } OSW32Semaphore; -#define BEAMFORMER_OS_ADD_FILE_WATCH_FN(name) void name(char *path, int64_t path_length, void *user_context) -BEAMFORMER_IMPORT BEAMFORMER_OS_ADD_FILE_WATCH_FN(os_add_file_watch); +typedef uint64_t os_thread_entry_point_fn(void *user_context); -#define BEAMFORMER_OS_LOOKUP_SYMBOL_FN(name) void *name(BeamformerLibraryHandle library, char *symbol, Stream *error) -BEAMFORMER_IMPORT BEAMFORMER_OS_LOOKUP_SYMBOL_FN(os_lookup_symbol); +typedef struct { + uint64_t timer_frequency; + + uint32_t logical_processor_count; + uint32_t page_size; + + uint8_t path_separator_byte; +} OSSystemInfo; + +BEAMFORMER_IMPORT OSSystemInfo * os_get_system_info(void); + +BEAMFORMER_IMPORT OSThread os_create_thread(const char *name, void *user_context, os_thread_entry_point_fn *fn); +BEAMFORMER_IMPORT OSBarrier os_barrier_alloc(uint32_t thread_count); +BEAMFORMER_IMPORT void os_barrier_enter(OSBarrier); + +BEAMFORMER_IMPORT void os_add_file_watch(const char *path, int64_t path_length, void *user_context); +BEAMFORMER_IMPORT int64_t os_read_entire_file(const char *file, void *buffer, int64_t buffer_capacity); + +BEAMFORMER_IMPORT void * os_lookup_symbol(OSLibrary library, const char *symbol); -function void os_barrier_wait(Barrier); -function iptr os_error_handle(void); -function s8 os_path_separator(void); -function OS_READ_WHOLE_FILE_FN(os_read_whole_file); -function OS_SHARED_MEMORY_LOCK_REGION_FN(os_shared_memory_region_lock); -function OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock); -function OS_WAKE_WAITERS_FN(os_wake_waiters); -function OS_WRITE_FILE_FN(os_write_file); +/* NOTE(rnp): memory watch timed waiting functions. (-1) is an infinite timeout. the beamformer + * will use these with the intention of yielding the thread back to the OS. */ +BEAMFORMER_IMPORT uint32_t os_wait_on_address(int32_t *lock, int32_t current, uint32_t timeout_ms); +BEAMFORMER_IMPORT void os_wake_all_waiters(int32_t *lock); + +// NOTE(rnp): eventually logging will just be done internally +BEAMFORMER_IMPORT void os_console_log(uint8_t *data, int64_t length); +BEAMFORMER_IMPORT void os_fatal(uint8_t *data, int64_t length); + +/* NOTE(rnp): this functionality is only needed on win32 to provide cross process + * synchronization. While posix has equivalent functionality there is no reason to + * use it over a value located in shared memory. */ +#if defined(_WIN32) +BEAMFORMER_IMPORT OSW32Semaphore os_w32_create_semaphore(const char *name, int32_t initial_count, int32_t maximum_count); +BEAMFORMER_IMPORT uint32_t os_w32_semaphore_wait(OSW32Semaphore, uint32_t timeout_ms); +BEAMFORMER_IMPORT void os_w32_semaphore_release(OSW32Semaphore, int32_t count); +#endif ////////////////////////////// // BEAMFORMER APPLICATION API @@ -108,11 +137,24 @@ typedef struct { } BeamformerInputEvent; typedef struct { + /* NOTE(rnp): besides vulkan library code the beamformer will not allocate memory on its + * own. Recommended minimum size is 16MB. If shared memory is not provided it is recommended + * to increase this to at least 1GB to help facilitate loading of external data files (not yet + * implemented). */ void * memory; uint64_t memory_size; + /* NOTE(rnp): beamformer will use this to communicate with external processes. While it + * it won't be required in the future it is currently the only way to load data. + * Recommended size is 2-4GB. Currently this size will also limit the size of any data + * another process wishes to export. The name is required for listing in the UI so that + * users of external processes can open the region on their end. */ + void * shared_memory; + uint64_t shared_memory_size; + uint8_t * shared_memory_name; + uint32_t shared_memory_name_length; + uint64_t timer_ticks; - uint64_t timer_frequency; float mouse_x; float mouse_y; @@ -126,7 +168,7 @@ typedef struct { /* NOTE(rnp): the beamformer is not allowed to dynamically load libraries * itself. Libraries are optional and the beamformer will not use features * from libraries which have not been provided. */ - BeamformerLibraryHandle cuda_library_handle; + OSLibrary cuda_library_handle; #if BEAMFORMER_RENDERDOC_HOOKS void *renderdoc_start_frame_capture; @@ -136,16 +178,25 @@ typedef struct { BEAMFORMER_EXPORT void beamformer_init(BeamformerInput *); -#define BEAMFORMER_FRAME_STEP_FN(name) void name(BeamformerInput *input) -typedef BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step_fn); - -#define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(void *memory) -typedef BEAMFORMER_DEBUG_UI_DEINIT_FN(beamformer_debug_ui_deinit_fn); - -function void beamformer_invalidate_shared_memory(void *memory); +/* NOTE(rnp): while the platform can also decide to terminate the beamformer, + * the beamformer itself may indicate that it wants to terminate. If the + * beamformer itself decides to terminate it is unnecessary to call + * `beamformer_terminate()` but it will act as a NOP if you do. */ +BEAMFORMER_EXPORT uint32_t beamformer_should_close(BeamformerInput *); + +/* IMPORTANT(rnp): since the beamformer may be interacting with external hardware + * it is critical that the platform calls this when it wishes to terminate the + * beamformer. Otherwise the external hardware may be left in a bad state and require + * a reboot. The beamformer will not waste time releasing resources unless it was + * compiled with BEAMFORMER_DEBUG enabled (useful for address sanitizer). */ +BEAMFORMER_EXPORT void beamformer_terminate(BeamformerInput *); + +#if !BEAMFORMER_DEBUG +BEAMFORMER_EXPORT void beamformer_frame_step(BeamformerInput *); +#endif #if BEAMFORMER_DEBUG -BEAMFORMER_EXPORT void beamformer_debug_hot_reload(BeamformerLibraryHandle, BeamformerInput *); +BEAMFORMER_EXPORT void beamformer_debug_hot_reload(OSLibrary new_library, BeamformerInput *); #endif #endif /*BEAMFORMER_H */ diff --git a/beamformer_core.c b/beamformer_core.c @@ -22,6 +22,12 @@ * [ ]: bug: reinit cuda on hot-reload */ +#include "compiler.h" + +#if defined(BEAMFORMER_DEBUG) && !defined(BEAMFORMER_EXPORT) && OS_WINDOWS + #define BEAMFORMER_EXPORT __declspec(dllexport) +#endif + #include "beamformer_internal.h" global f32 dt_for_frame; @@ -78,7 +84,7 @@ beamformer_compute_plan_for_block(BeamformerComputeContext *cc, u32 block, Arena assert(block < countof(cc->compute_plans)); BeamformerComputePlan *result = cc->compute_plans[block]; - assert(!arena && result); + assert(result || arena); if (!result) { result = SLLPopFreelist(cc->compute_plan_freelist); @@ -284,7 +290,7 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 decoded_data_size, Arena arena) * decode should just take the texture as a parameter. Third, none of these dimensions * need to be pre-known by the library unless its allocating GPU memory which it shouldn't * need to do. For now grab out of parameter block 0 but it is not correct */ - BeamformerParameterBlock *pb = beamformer_parameter_block(ctx->shared_memory.region, 0); + BeamformerParameterBlock *pb = beamformer_parameter_block(ctx->shared_memory, 0); /* NOTE(rnp): these are stubs when CUDA isn't supported */ cuda_register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo); u32 decoded_data_dimension[3] = {pb->parameters.sample_count, pb->parameters.channel_count, pb->parameters.acquisition_count}; @@ -794,11 +800,10 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl stream_append_s8(&shader_stream, beamformer_shader_data[reloadable_index]); shader_text = arena_stream_commit(&arena, &shader_stream); } else { - shader_text = arena_stream_commit(&arena, &shader_stream); - s8 file_text = os_read_whole_file(&arena, (c8 *)path.data); - - assert(shader_text.data + shader_text.len == file_text.data); - shader_text.len += file_text.len; + shader_text = arena_stream_commit(&arena, &shader_stream); + i64 length = os_read_entire_file((c8 *)path.data, arena.beg, arena_capacity(&arena, u8)); + shader_text.len += length; + arena_commit(&arena, length); } /* TODO(rnp): instance name */ @@ -813,7 +818,7 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl function void beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 block, Arena arena) { - BeamformerParameterBlock *pb = beamformer_parameter_block_lock(&ctx->shared_memory, block, -1); + BeamformerParameterBlock *pb = beamformer_parameter_block_lock(ctx->shared_memory, block, -1); for EachBit(pb->dirty_regions, region) { switch (region) { case BeamformerParameterBlockRegion_ComputePipeline: @@ -886,7 +891,7 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, }break; } } - beamformer_parameter_block_unlock(&ctx->shared_memory, block); + beamformer_parameter_block_unlock(ctx->shared_memory, block); } function void @@ -1090,9 +1095,9 @@ shader_text_with_header(s8 header, s8 filepath, b32 has_file, BeamformerShaderKi } else { result = arena_stream_commit(arena, &sb); if (has_file) { - s8 file = os_read_whole_file(arena, (c8 *)filepath.data); - assert(file.data == result.data + result.len); - result.len += file.len; + i64 length = os_read_entire_file((c8 *)filepath.data, arena->beg, arena_capacity(arena, u8)); + result.len += length; + arena_commit(arena, length); } } @@ -1135,8 +1140,8 @@ beamformer_reload_shader(BeamformerCtx *ctx, BeamformerShaderReloadContext *src, function void complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_context) { - BeamformerComputeContext *cs = &ctx->compute_context; - BeamformerSharedMemory *sm = ctx->shared_memory.region; + BeamformerComputeContext * cs = &ctx->compute_context; + BeamformerSharedMemory * sm = ctx->shared_memory; BeamformWork *work = beamform_work_queue_pop(q); while (work) { @@ -1144,8 +1149,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c switch (work->kind) { case BeamformerWorkKind_ExportBuffer:{ /* TODO(rnp): better way of handling DispatchCompute barrier */ - post_sync_barrier(&ctx->shared_memory, BeamformerSharedMemoryLockKind_DispatchCompute, sm->locks); - os_shared_memory_region_lock(&ctx->shared_memory, sm->locks, (i32)work->lock, (u32)-1); + post_sync_barrier(ctx->shared_memory, BeamformerSharedMemoryLockKind_DispatchCompute); + beamformer_shared_memory_take_lock(ctx->shared_memory, (i32)work->lock, (u32)-1); BeamformerExportContext *ec = &work->export_context; switch (ec->kind) { case BeamformerExportKind_BeamformedData:{ @@ -1171,8 +1176,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c }break; InvalidDefaultCase; } - os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)work->lock); - post_sync_barrier(&ctx->shared_memory, BeamformerSharedMemoryLockKind_ExportSync, sm->locks); + beamformer_shared_memory_release_lock(ctx->shared_memory, work->lock); + post_sync_barrier(ctx->shared_memory, BeamformerSharedMemoryLockKind_ExportSync); }break; case BeamformerWorkKind_CreateFilter:{ /* TODO(rnp): this should probably get deleted and moved to lazy loading */ @@ -1201,7 +1206,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c atomic_store_u32(&ctx->ui_dirty_parameter_blocks, (u32)(ctx->beamform_work_queue != q) << block); } - post_sync_barrier(&ctx->shared_memory, work->lock, sm->locks); + post_sync_barrier(ctx->shared_memory, work->lock); u32 dirty_programs = atomic_swap_u32(&cp->dirty_programs, 0); static_assert(ISPOWEROF2(BeamformerMaxComputeShaderStages), @@ -1366,7 +1371,7 @@ coalesce_timing_table(ComputeTimingTable *t, ComputeShaderStats *stats) DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) { BeamformerCtx *ctx = (BeamformerCtx *)user_context; - BeamformerSharedMemory *sm = ctx->shared_memory.region; + BeamformerSharedMemory *sm = ctx->shared_memory; complete_queue(ctx, &sm->external_work_queue, arena, gl_context); complete_queue(ctx, ctx->beamform_work_queue, arena, gl_context); } @@ -1395,7 +1400,7 @@ beamformer_rf_buffer_allocate(BeamformerRFBuffer *rf, u32 rf_size, b32 nvidia) DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) { - BeamformerSharedMemory *sm = ctx->shared_memory->region; + BeamformerSharedMemory *sm = ctx->shared_memory; BeamformerSharedMemoryLockKind scratch_lock = BeamformerSharedMemoryLockKind_ScratchSpace; BeamformerSharedMemoryLockKind upload_lock = BeamformerSharedMemoryLockKind_UploadRF; @@ -1403,7 +1408,7 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) if (atomic_load_u32(sm->locks + upload_lock) && (rf_block_rf_size = atomic_swap_u64(&sm->rf_block_rf_size, 0))) { - os_shared_memory_region_lock(ctx->shared_memory, sm->locks, (i32)scratch_lock, (u32)-1); + beamformer_shared_memory_take_lock(ctx->shared_memory, (i32)scratch_lock, (u32)-1); BeamformerRFBuffer *rf = ctx->rf_buffer; BeamformerParameterBlock *b = beamformer_parameter_block(sm, (u32)(rf_block_rf_size >> 32ULL)); @@ -1438,8 +1443,8 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) else memory_copy_non_temporal(rf->buffer + slot * rf->active_rf_size, data, size); store_fence(); - os_shared_memory_region_unlock(ctx->shared_memory, sm->locks, (i32)scratch_lock); - post_sync_barrier(ctx->shared_memory, upload_lock, sm->locks); + beamformer_shared_memory_release_lock(ctx->shared_memory, (i32)scratch_lock); + post_sync_barrier(ctx->shared_memory, upload_lock); if (!nvidia) glFlushMappedNamedBufferRange(rf->ssbo, slot * rf->active_rf_size, (i32)rf->active_rf_size); @@ -1447,7 +1452,7 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) atomic_store_u64(rf->upload_syncs + slot, glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0)); atomic_store_u64(rf->compute_syncs + slot, 0); - os_wake_waiters(ctx->compute_worker_sync); + os_wake_all_waiters(ctx->compute_worker_sync); ComputeTimingInfo info = {.kind = ComputeTimingInfoKind_RF_Data}; glGetQueryObjectui64v(rf->data_timestamp_query, GL_QUERY_RESULT, &info.timer_count); @@ -1459,17 +1464,16 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) function void beamformer_queue_compute(BeamformerCtx *ctx, BeamformerFrame *frame, u32 parameter_block) { - BeamformerSharedMemory *sm = ctx->shared_memory.region; + BeamformerSharedMemory *sm = ctx->shared_memory; BeamformerSharedMemoryLockKind dispatch_lock = BeamformerSharedMemoryLockKind_DispatchCompute; - if (!sm->live_imaging_parameters.active && - os_shared_memory_region_lock(&ctx->shared_memory, sm->locks, (i32)dispatch_lock, 0)) + if (!sm->live_imaging_parameters.active && beamformer_shared_memory_take_lock(sm, (i32)dispatch_lock, 0)) { BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); BeamformerViewPlaneTag tag = frame ? frame->view_plane_tag : 0; if (fill_frame_compute_work(ctx, work, tag, parameter_block, 0)) beamform_work_queue_push_commit(ctx->beamform_work_queue); } - os_wake_waiters(&ctx->compute_worker.sync_variable); + os_wake_all_waiters(&ctx->compute_worker.sync_variable); } #include "ui.c" @@ -1500,7 +1504,7 @@ beamformer_process_input_events(BeamformerCtx *ctx, BeamformerInput *input, beamformer_reload_shader(ctx, src, ctx->arena, beamformer_shader_names[kind]); }break; case BeamformerFileReloadKind_ComputeShader:{ - BeamformerSharedMemory *sm = ctx->shared_memory.region; + BeamformerSharedMemory *sm = ctx->shared_memory; u32 reserved_blocks = sm->reserved_parameter_blocks; for (u32 block = 0; block < reserved_blocks; block++) { @@ -1524,11 +1528,12 @@ beamformer_process_input_events(BeamformerCtx *ctx, BeamformerInput *input, } } -DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) +BEAMFORMER_EXPORT void +beamformer_frame_step(BeamformerInput *input) { BeamformerCtx *ctx = BeamformerContextMemory(input->memory); - dt_for_frame = (f64)(input->timer_ticks) / input->timer_frequency; + dt_for_frame = (f64)(input->timer_ticks) / os_get_system_info()->timer_frequency; if (IsWindowResized()) { ctx->window_size.h = GetScreenHeight(); @@ -1539,11 +1544,11 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) beamformer_process_input_events(ctx, input, input->event_queue, input->event_count); - BeamformerSharedMemory *sm = ctx->shared_memory.region; + BeamformerSharedMemory *sm = ctx->shared_memory; if (atomic_load_u32(sm->locks + BeamformerSharedMemoryLockKind_UploadRF)) - os_wake_waiters(&ctx->upload_worker.sync_variable); + os_wake_all_waiters(&ctx->upload_worker.sync_variable); if (atomic_load_u32(sm->locks + BeamformerSharedMemoryLockKind_DispatchCompute)) - os_wake_waiters(&ctx->compute_worker.sync_variable); + os_wake_all_waiters(&ctx->compute_worker.sync_variable); BeamformerFrame *frame = ctx->latest_frame; BeamformerViewPlaneTag tag = frame? frame->view_plane_tag : 0; @@ -1551,12 +1556,3 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) ctx->frame_view_render_context.updated = 0; } - -/* NOTE(rnp): functions defined in these shouldn't be visible to the whole program */ -#if _DEBUG - #if OS_LINUX - #include "os_linux.c" - #elif OS_WINDOWS - #include "os_win32.c" - #endif -#endif diff --git a/beamformer_internal.h b/beamformer_internal.h @@ -2,9 +2,9 @@ #ifndef BEAMFORMER_INTERNAL_H #define BEAMFORMER_INTERNAL_H -#include "util.h" - #include "beamformer.h" + +#include "util.h" #include "opengl.h" #include "generated/beamformer.meta.c" @@ -15,9 +15,12 @@ #include "threads.c" #include "util_gl.c" +#include "util_os.c" #define beamformer_info(s) s8("[info] " s "\n") +#define os_path_separator() (s8){.data = &os_get_system_info()->path_separator_byte, .len = 1} + /////////////////////////////// // NOTE: CUDA Library Bindings @@ -251,10 +254,10 @@ typedef struct { } ComputeTimingTable; typedef struct { - BeamformerRFBuffer *rf_buffer; - SharedMemoryRegion *shared_memory; - ComputeTimingTable *compute_timing_table; - i32 *compute_worker_sync; + BeamformerRFBuffer * rf_buffer; + BeamformerSharedMemory * shared_memory; + ComputeTimingTable * compute_timing_table; + i32 * compute_worker_sync; } BeamformerUploadThreadContext; struct BeamformerFrame { @@ -281,6 +284,26 @@ struct BeamformerFrame { }; typedef struct { + OSThread handle; + + Arena arena; + iptr window_handle; + iptr gl_context; + iptr user_context; + i32 sync_variable; + b32 awake; +} GLWorkerThreadContext; + +typedef enum { + BeamformerState_Uninitialized = 0, + BeamformerState_Running, + BeamformerState_ShouldClose, + BeamformerState_Terminated, +} BeamformerState; + +typedef struct { + BeamformerState state; + iv2 window_size; Arena arena; @@ -301,7 +324,7 @@ typedef struct { ComputeShaderStats *compute_shader_stats; ComputeTimingTable *compute_timing_table; - SharedMemoryRegion shared_memory; + BeamformerSharedMemory *shared_memory; BeamformerFrame beamform_frames[BeamformerMaxSavedFrames]; BeamformerFrame *latest_frame; @@ -344,4 +367,7 @@ typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn); #define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx) typedef BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload_fn); +#define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(BeamformerCtx *ctx) +typedef BEAMFORMER_DEBUG_UI_DEINIT_FN(beamformer_debug_ui_deinit_fn); + #endif /* BEAMFORMER_INTERNAL_H */ diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c @@ -213,6 +213,34 @@ beamform_work_queue_push_commit(BeamformWorkQueue *q) atomic_add_u64(&q->queue, 1); } +#if OS_WINDOWS +// NOTE(rnp): junk needed on w32 to watch a value across processes while yielding +// control back to the kernel. There are user level CPU instructions that allow +// this so why w32 can't do it in kernel mode sounds like shitty design to me. +DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)]; +#endif + +function b32 +beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms) +{ +#if OS_WINDOWS + b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms); + if (result) atomic_store_u32(sm->locks + lock, 1); +#else + b32 result = take_lock(sm->locks + lock, timeout_ms); +#endif + return result; +} + +function void +beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock) +{ + release_lock(sm->locks + lock); +#if OS_WINDOWS + os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1); +#endif +} + function BeamformerParameterBlock * beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) { @@ -229,22 +257,20 @@ beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) } function BeamformerParameterBlock * -beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms) +beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms) { assert(block < BeamformerMaxParameterBlockSlots); - BeamformerSharedMemory *b = sm->region; BeamformerParameterBlock *result = 0; - if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms)) - result = beamformer_parameter_block(sm->region, block); + if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms)) + result = beamformer_parameter_block(sm, block); return result; } function void -beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block) +beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block) { assert(block < BeamformerMaxParameterBlockSlots); - BeamformerSharedMemory *b = sm->region; - os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block); + beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block); } function Arena @@ -265,11 +291,11 @@ mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, Beamfor } function void -post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks) +post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock) { /* NOTE(rnp): debug: here it is not a bug to release the lock if it * isn't held but elswhere it is */ - DEBUG_DECL(if (locks[lock])) { - os_shared_memory_region_unlock(sm, locks, (i32)lock); + DEBUG_DECL(if (sm->locks[lock])) { + beamformer_shared_memory_release_lock(sm, lock); } } diff --git a/build.c b/build.c @@ -8,6 +8,7 @@ * [ ]: msvc build doesn't detect out of date files correctly * [ ]: seperate dwarf debug info */ + #include "util.h" #include <stdarg.h> @@ -48,6 +49,8 @@ global char *g_argv0; #define is_gcc COMPILER_GCC #define is_msvc COMPILER_MSVC +#define BEAMFORMER_IMPORT function + #if OS_LINUX #include <dirent.h> @@ -172,6 +175,15 @@ build_fatal_(char *format, ...) os_exit(1); } +function s8 +read_entire_file(const char *file, Arena *arena) +{ + s8 result = {0}; + result.len = os_read_entire_file(file, arena->beg, arena_capacity(arena, u8)); + if (result.len) result.data = arena_commit(arena, result.len); + return result; +} + function b32 s8_equal(s8 a, s8 b) { @@ -1411,7 +1423,7 @@ meta_entry_extract_scope(MetaEntry *base, iz entry_count) function MetaEntryStack meta_entry_stack_from_file(Arena *arena, char *file) { - MetaParser parser = {.p.s = os_read_whole_file(arena, file)}; + MetaParser parser = {.p.s = read_entire_file(file, arena)}; MetaEntryStack result = {.raw = parser.p.s}; compiler_file = file; @@ -2593,7 +2605,7 @@ metagen_run_emit(MetaprogramContext *m, MetaContext *ctx, MetaEmitOperationList case MetaEmitOperationKind_FileBytes:{ Arena scratch = m->scratch; s8 filename = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), ctx->directory, op->string); - s8 file = os_read_whole_file(&scratch, (c8 *)filename.data); + s8 file = read_entire_file((c8 *)filename.data, &scratch); m->indentation_level++; metagen_push_byte_array(m, file); m->indentation_level--; @@ -2834,7 +2846,7 @@ meta_push_shader_bake(MetaprogramContext *m, MetaContext *ctx) Arena scratch = m->scratch; s8 filename = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), ctx->base_shaders.data[shader].file); - s8 file = os_read_whole_file(&scratch, (c8 *)filename.data); + s8 file = read_entire_file((c8 *)filename.data, &scratch); metagen_push_byte_array(m, file); } meta_end_scope(m, s8("};\n")); } @@ -3322,8 +3334,8 @@ metagen_emit_helper_library_header(MetaContext *ctx, Arena arena) build_log_generate("Library Header"); - s8 parameters_header = os_read_whole_file(&arena, "beamformer_parameters.h"); - s8 base_header = os_read_whole_file(&arena, "lib/ogl_beamformer_lib_base.h"); + s8 parameters_header = read_entire_file("beamformer_parameters.h", &arena); + s8 base_header = read_entire_file("lib/ogl_beamformer_lib_base.h", &arena); MetaprogramContext m[1] = {{.stream = arena_stream(arena), .scratch = ctx->scratch}}; @@ -3524,8 +3536,6 @@ metagen_file_direct(Arena arena, char *filename) i32 main(i32 argc, char *argv[]) { - os_common_init(); - u64 start_time = os_get_timer_counter(); g_argv0 = argv[0]; diff --git a/compiler.h b/compiler.h @@ -60,4 +60,13 @@ #define ARCH_ARM64 0 #endif +/* NOTE: glibc devs are actually buffoons who never write any real code. even the most + * basic headers like stdint.h will pull in every other header in libc and most + * useful functions are hidden behind this dogshit macro gaurd */ +#if OS_LINUX + #ifndef _GNU_SOURCE + #define _GNU_SOURCE + #endif +#endif + #endif /* COMPILER_H */ diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c @@ -1,7 +1,12 @@ /* See LICENSE for license details. */ #include "../compiler.h" +#define BEAMFORMER_IMPORT static + +#include "../beamformer.h" + #include "../util.h" + #include "../generated/beamformer.meta.c" #include "../beamformer_parameters.h" #include "ogl_beamformer_lib_base.h" @@ -17,10 +22,10 @@ W32(iptr) OpenFileMappingA(u32, b32, c8 *); #error Unsupported Platform #endif +#include "../util_os.c" #include "../beamformer_shared_memory.c" global struct { - SharedMemoryRegion shared_memory; BeamformerSharedMemory *bp; i32 timeout_ms; BeamformerLibErrorKind last_error; @@ -28,21 +33,14 @@ global struct { #if OS_LINUX -function b32 -os_reserve_region_locks(iptr os_context, u32 count) -{ - b32 result = count <= BeamformerMaxParameterBlockSlots; - return result; -} - -function SharedMemoryRegion +function void * os_open_shared_memory_area(char *name) { - SharedMemoryRegion result = {0}; + void *result = 0; i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR); if (fd > 0) { void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (new != MAP_FAILED) result.region = new; + if (new != MAP_FAILED) result = new; close(fd); } return result; @@ -50,60 +48,47 @@ os_open_shared_memory_area(char *name) #elif OS_WINDOWS -function b32 -os_reserve_region_locks(iptr os_context, u32 count) -{ - local_persist iptr semaphores[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots]; - w32_shared_memory_context *ctx = (typeof(ctx))os_context; +W32(b32) UnmapViewOfFile(void *); - b32 result = count <= BeamformerMaxParameterBlockSlots; - if (result) { - count += BeamformerSharedMemoryLockKind_Count; - if (count > ctx->reserved_count) { - u8 buffer[1024]; - Stream sb = {.data = buffer, .cap = countof(buffer)}; - stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_")); - - u32 new_reserved_count; - for (new_reserved_count = ctx->reserved_count; - new_reserved_count < count && result; - new_reserved_count++) - { - Stream lb = sb; - stream_append_u64(&lb, new_reserved_count); - stream_append_byte(&lb, 0); - semaphores[new_reserved_count] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data); - result &= semaphores[new_reserved_count] != INVALID_FILE; - } +function b32 +os_reserve_region_locks(void) +{ + u8 buffer[1024]; + Stream sb = {.data = buffer, .cap = countof(buffer)}; + stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_")); + + i32 start_index = sb.widx; + u32 reserved_count = 0; + for EachElement(os_w32_shared_memory_semaphores, it) { + stream_reset(&sb, start_index); + stream_append_u64(&sb, it); + stream_append_byte(&sb, 0); + os_w32_shared_memory_semaphores[it] = os_w32_create_semaphore((c8 *)sb.data, 1, 1); + if InvalidHandle(os_w32_shared_memory_semaphores[it]) + break; + reserved_count++; + } - if (result) { - ctx->semaphores = semaphores; - ctx->reserved_count = count; - } else { - for (u32 j = ctx->reserved_count; j < new_reserved_count; j++) - CloseHandle(semaphores[j]); - } - } else if (count < ctx->reserved_count) { - for (u32 i = ctx->reserved_count; i > count;) - CloseHandle(semaphores[--i]); - ctx->reserved_count = count; - } + b32 result = reserved_count == countof(os_w32_shared_memory_semaphores); + if (!result) { + for (u32 i = 0; i < reserved_count; i++) + CloseHandle(os_w32_shared_memory_semaphores[i].value[0]); } + return result; } -function SharedMemoryRegion +function void * os_open_shared_memory_area(char *name) { - local_persist w32_shared_memory_context ctx = {0}; - SharedMemoryRegion result = {0}; iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name); + void *result = 0; if (h != INVALID_FILE) { void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE); - if (new && os_reserve_region_locks((iptr)&ctx, 1)) { - result.region = new; - result.os_context = (iptr)&ctx; - } + if (new && os_reserve_region_locks()) + result = new; + if (new && !result) + UnmapViewOfFile(new); CloseHandle(h); } return result; @@ -123,12 +108,11 @@ lib_error_check(b32 condition, BeamformerLibErrorKind error_kind) function b32 check_shared_memory(void) { - if (!g_beamformer_library_context.shared_memory.region) { - g_beamformer_library_context.shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME); - if (lib_error_check(g_beamformer_library_context.shared_memory.region != 0, BF_LIB_ERR_KIND_SHARED_MEMORY)) { - u32 version = ((BeamformerSharedMemory *)g_beamformer_library_context.shared_memory.region)->version; - if (lib_error_check(version == BEAMFORMER_SHARED_MEMORY_VERSION, BF_LIB_ERR_KIND_VERSION_MISMATCH)) - g_beamformer_library_context.bp = g_beamformer_library_context.shared_memory.region; + if (!g_beamformer_library_context.bp) { + g_beamformer_library_context.bp = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME); + if (lib_error_check(g_beamformer_library_context.bp != 0, BF_LIB_ERR_KIND_SHARED_MEMORY)) { + u32 version = g_beamformer_library_context.bp->version; + lib_error_check(version == BEAMFORMER_SHARED_MEMORY_VERSION, BF_LIB_ERR_KIND_VERSION_MISMATCH); } } @@ -160,9 +144,7 @@ try_push_work_queue(void) function b32 lib_try_lock(i32 lock, i32 timeout_ms) { - b32 result = os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory, - g_beamformer_library_context.bp->locks, - lock, (u32)timeout_ms); + b32 result = beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, (u32)timeout_ms); lib_error_check(result, BF_LIB_ERR_KIND_SYNC_VARIABLE); return result; } @@ -170,8 +152,7 @@ lib_try_lock(i32 lock, i32 timeout_ms) function void lib_release_lock(i32 lock) { - os_shared_memory_region_unlock(&g_beamformer_library_context.shared_memory, - g_beamformer_library_context.bp->locks, (i32)lock); + beamformer_shared_memory_release_lock(g_beamformer_library_context.bp, lock); } u32 @@ -214,13 +195,9 @@ beamformer_reserve_parameter_blocks(uint32_t count) { b32 result = 0; if (check_shared_memory() && - lib_error_check(os_reserve_region_locks(g_beamformer_library_context.shared_memory.os_context, count), - BF_LIB_ERR_KIND_PARAMETER_BLOCK_OVERFLOW)) + lib_error_check(count <= BeamformerMaxParameterBlockSlots, BF_LIB_ERR_KIND_PARAMETER_BLOCK_OVERFLOW)) { - u32 old_count = g_beamformer_library_context.bp->reserved_parameter_blocks; g_beamformer_library_context.bp->reserved_parameter_blocks = count; - for (u32 i = old_count; i < count; i++) - zero_struct(beamformer_parameter_block(g_beamformer_library_context.bp, i)); result = 1; } return result; @@ -364,8 +341,7 @@ function void beamformer_flush_commands(void) { i32 lock = BeamformerSharedMemoryLockKind_DispatchCompute; - os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory, - g_beamformer_library_context.bp->locks, lock, 0); + beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, 0); } #define BEAMFORMER_UPLOAD_FNS \ diff --git a/main_linux.c b/main_linux.c @@ -6,13 +6,11 @@ #endif #ifndef BEAMFORMER_DEBUG - #define BEAMFORMER_IMPORT function - #define BEAMFORMER_EXPORT function + #define BEAMFORMER_IMPORT static + #define BEAMFORMER_EXPORT static #endif -#include "util.h" -#include "beamformer.h" - +#include "beamformer.c" #include "os_linux.c" #define OS_DEBUG_LIB_NAME "./beamformer.so" @@ -23,52 +21,178 @@ #define OS_RENDERDOC_SONAME "librenderdoc.so" -/* TODO(rnp): what do if not X11? */ -iptr glfwGetGLXContext(iptr); -function iptr -os_get_native_gl_context(iptr window) +#include <dlfcn.h> + +typedef enum { + OSLinux_FileWatchKindPlatform, + OSLinux_FileWatchKindUser, +} OSLinux_FileWatchKind; + +typedef struct { + OSLinux_FileWatchKind kind; + u64 hash; + u64 update_time; + void * user_context; +} OSLinux_FileWatch; + +typedef struct { + u64 hash; + iptr handle; + s8 name; + + OSLinux_FileWatch * data; + iz count; + iz capacity; +} OSLinux_FileWatchDirectory; +DA_STRUCT(OSLinux_FileWatchDirectory, OSLinux_FileWatchDirectory); + +typedef struct { + Arena arena; + i32 arena_lock; + + i32 inotify_handle; + + OSLinux_FileWatchDirectoryList file_watch_list; + + OSSystemInfo system_info; +} OSLinux_Context; +global OSLinux_Context os_linux_context; + +BEAMFORMER_IMPORT OSSystemInfo * +os_get_system_info(void) { - return glfwGetGLXContext(window); + return &os_linux_context.system_info; } -iptr glfwGetProcAddress(char *); -function iptr -os_gl_proc_address(char *name) +BEAMFORMER_IMPORT OSThread +os_create_thread(const char *name, void *user_context, os_thread_entry_point_fn *fn) { - return glfwGetProcAddress(name); + pthread_t thread; + pthread_create(&thread, 0, (void *)fn, (void *)user_context); + + if (name) { + char buffer[16]; + s8 name_str = c_str_to_s8((char *)name); + u64 length = (u64)CLAMP(name_str.len, 0, countof(buffer) - 1); + mem_copy(buffer, (char *)name, length); + buffer[length] = 0; + pthread_setname_np(thread, buffer); + } + + OSThread result = {(u64)thread}; + return result; } -#include "beamformer.c" +BEAMFORMER_IMPORT OSBarrier +os_barrier_alloc(u32 count) +{ + OSBarrier result = {0}; + DeferLoop(take_lock(&os_linux_context.arena_lock, -1), release_lock(&os_linux_context.arena_lock)) + { + pthread_barrier_t *barrier = push_struct(&os_linux_context.arena, pthread_barrier_t); + pthread_barrier_init(barrier, 0, count); + result.value[0] = (u64)barrier; + } + return result; +} -#include <dlfcn.h> +BEAMFORMER_IMPORT void +os_barrier_enter(OSBarrier barrier) +{ + pthread_barrier_t *b = (pthread_barrier_t *)barrier.value[0]; + if (b) pthread_barrier_wait(b); +} -BEAMFORMER_IMPORT BEAMFORMER_OS_ADD_FILE_WATCH_FN(os_add_file_watch) +BEAMFORMER_IMPORT void +os_console_log(u8 *data, i64 length) { - s8 path_str = {.data = (u8 *)path, .len = path_length}; - os_linux_add_file_watch(path_str, user_context, OSLinux_FileWatchKindUser); + os_write_file(STDERR_FILENO, data, length); +} + +BEAMFORMER_IMPORT void +os_fatal(u8 *data, i64 length) +{ + os_write_file(STDERR_FILENO, data, length); + os_exit(1); + unreachable(); +} + +BEAMFORMER_IMPORT void * +os_lookup_symbol(OSLibrary library, const char *symbol) +{ + void *result = 0; + if ValidHandle(library) result = dlsym((void *)library.value[0], symbol); + return result; } -BEAMFORMER_IMPORT BEAMFORMER_OS_LOOKUP_SYMBOL_FN(os_lookup_symbol) +function void * +allocate_shared_memory(char *name, iz requested_capacity, u64 *capacity) { + u64 rounded_capacity = round_up_to(requested_capacity, os_linux_context.system_info.page_size); void *result = 0; - if ValidHandle(library) { - result = dlsym((void *)library.value[0], symbol); - if (!result && error) { - stream_append_s8s(error, s8("os_lookup_symbol(\""), c_str_to_s8(symbol), s8("\"): "), - c_str_to_s8(dlerror()), s8("\n")); + i32 fd = shm_open(name, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + if (fd > 0 && ftruncate(fd, rounded_capacity) != -1) { + void *new = mmap(0, rounded_capacity, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (new != MAP_FAILED) { + *capacity = rounded_capacity; + result = new; } } + if (fd > 0) close(fd); return result; } -function BeamformerLibraryHandle +function OSLinux_FileWatchDirectory * +os_lookup_file_watch_directory(OSLinux_FileWatchDirectoryList *ctx, u64 hash) +{ + OSLinux_FileWatchDirectory *result = 0; + for (iz i = 0; !result && i < ctx->count; i++) + if (ctx->data[i].hash == hash) + result = ctx->data + i; + return result; +} + +function void +os_linux_add_file_watch(s8 path, void *user_context, OSLinux_FileWatchKind kind) +{ + s8 directory = path; + directory.len = s8_scan_backwards(path, '/'); + assert(directory.len > 0); + + OSLinux_FileWatchDirectoryList *fwctx = &os_linux_context.file_watch_list; + + u64 hash = u64_hash_from_s8(directory); + OSLinux_FileWatchDirectory *dir = os_lookup_file_watch_directory(fwctx, hash); + if (!dir) { + assert(path.data[directory.len] == '/'); + dir = da_push(&os_linux_context.arena, fwctx); + dir->hash = hash; + dir->name = push_s8(&os_linux_context.arena, directory); + u32 mask = IN_MOVED_TO|IN_CLOSE_WRITE; + dir->handle = inotify_add_watch(os_linux_context.inotify_handle, (c8 *)dir->name.data, mask); + } + + OSLinux_FileWatch *fw = da_push(&os_linux_context.arena, dir); + fw->user_context = user_context; + fw->hash = u64_hash_from_s8(s8_cut_head(path, dir->name.len + 1)); + fw->kind = kind; +} + +BEAMFORMER_IMPORT void +os_add_file_watch(const char *path, int64_t path_length, void *user_context) +{ + s8 path_str = {.data = (u8 *)path, .len = path_length}; + os_linux_add_file_watch(path_str, user_context, OSLinux_FileWatchKindUser); +} + +function OSLibrary load_library(char *name, char *temp_name, u32 flags) { if (temp_name && os_copy_file(name, temp_name)) name = temp_name; - BeamformerLibraryHandle result = {(u64)dlopen(name, flags)}; - if (result.value[0] == 0) result = BeamformerInvalidHandle; + OSLibrary result = {(u64)dlopen(name, flags)}; + if (result.value[0] == 0) result.value[0] = OSInvalidHandleValue; if (temp_name) unlink(temp_name); @@ -79,11 +203,11 @@ load_library(char *name, char *temp_name, u32 flags) function void debug_library_reload(BeamformerInput *input) { - local_persist BeamformerLibraryHandle beamformer_library_handle = BeamformerInvalidHandle; - BeamformerLibraryHandle new_handle = load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, RTLD_NOW|RTLD_LOCAL); + local_persist OSLibrary beamformer_library_handle = {OSInvalidHandleValue}; + OSLibrary new_handle = load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, RTLD_NOW|RTLD_LOCAL); - if (!ValidHandle(beamformer_library_handle) && !ValidHandle(new_handle)) - os_fatal(s8("[os] failed to load: " OS_DEBUG_LIB_NAME "\n")); + if (InvalidHandle(beamformer_library_handle) && InvalidHandle(new_handle)) + fatal(s8("[os] failed to load: " OS_DEBUG_LIB_NAME "\n")); if ValidHandle(new_handle) { beamformer_debug_hot_reload(new_handle, input); @@ -107,10 +231,10 @@ load_platform_libraries(BeamformerInput *input) input->cuda_library_handle = load_library(OS_CUDA_LIB_NAME, OS_CUDA_LIB_TEMP_NAME, RTLD_NOW|RTLD_LOCAL); #if BEAMFORMER_RENDERDOC_HOOKS - local_persist BeamformerLibraryHandle renderdoc_handle = BeamformerInvalidHandle; + local_persist OSLibrary renderdoc_handle = {OSInvalidHandleValue}; renderdoc_handle = load_library(OS_RENDERDOC_SONAME, 0, RTLD_NOW|RTLD_LOCAL|RTLD_NOLOAD); if ValidHandle(renderdoc_handle) { - renderdoc_get_api_fn *get_api = os_lookup_symbol(renderdoc_handle, "RENDERDOC_GetAPI", 0); + renderdoc_get_api_fn *get_api = os_lookup_symbol(renderdoc_handle, "RENDERDOC_GetAPI"); if (get_api) { RenderDocAPI *api = 0; if (get_api(10600, (void **)&api)) { @@ -172,18 +296,26 @@ dispatch_file_watch_events(BeamformerInput *input, u64 current_time) extern i32 main(void) { - os_common_init(); + os_linux_context.system_info.timer_frequency = os_get_timer_frequency(); + os_linux_context.system_info.logical_processor_count = (u32)get_nprocs(); + os_linux_context.system_info.page_size = (u32)getpagesize(); + os_linux_context.system_info.path_separator_byte = '/'; Arena program_memory = os_alloc_arena(MB(16) + KB(16)); os_linux_context.arena = sub_arena(&program_memory, KB(16), KB(4)); os_linux_context.inotify_handle = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + BeamformerInput *input = push_struct(&program_memory, BeamformerInput); + input->memory = program_memory.beg; + input->memory_size = program_memory.end - program_memory.beg; + input->shared_memory = allocate_shared_memory(OS_SHARED_MEMORY_NAME, BEAMFORMER_SHARED_MEMORY_SIZE, + &input->shared_memory_size); + if (input->shared_memory) { + input->shared_memory_name = s8(OS_SHARED_MEMORY_NAME).data; + input->shared_memory_name_length = s8(OS_SHARED_MEMORY_NAME).len; + } - BeamformerInput *input = push_struct(&program_memory, BeamformerInput); - input->memory = program_memory.beg; - input->memory_size = program_memory.end - program_memory.beg; - input->timer_frequency = os_get_timer_frequency(); input->event_queue[input->event_count++] = (BeamformerInputEvent){ .kind = BeamformerInputEventKind_ExecutableReload, }; @@ -197,7 +329,7 @@ main(void) fds[0].events = POLLIN; u64 last_time = os_get_timer_counter(); - while (!WindowShouldClose()) { + while (!WindowShouldClose() && !beamformer_should_close(input)) { u64 now = os_get_timer_counter(); poll(fds, countof(fds), 0); @@ -217,8 +349,7 @@ main(void) input->event_count = 0; } - beamformer_invalidate_shared_memory(program_memory.beg); - beamformer_debug_ui_deinit(program_memory.beg); + beamformer_terminate(input); /* NOTE: make sure this will get cleaned up after external * programs release their references */ diff --git a/main_w32.c b/main_w32.c @@ -11,22 +11,40 @@ #define OS_LINUX 0 #define __declspec(x) #define __stdcall + #define _WIN32 #endif #if !OS_WINDOWS #error This file is only meant to be compiled for Win32 #endif -#ifndef BEAMFORMER_DEBUG - #define BEAMFORMER_IMPORT function - #define BEAMFORMER_EXPORT function +#ifdef BEAMFORMER_DEBUG + #define BEAMFORMER_IMPORT __declspec(dllexport) +#else + #define BEAMFORMER_IMPORT static + #define BEAMFORMER_EXPORT static #endif -#include "util.h" -#include "beamformer.h" - +#include "beamformer.c" #include "os_win32.c" +typedef struct { + u32 reserved1; + u32 reserved2; + u64 Reserved3[2]; + u32 reserved4; + u32 reserved5; +} w32_synchronization_barrier; + +W32(u64) CreateThread(iptr, uz, iptr, iptr, u32, u32 *); +W32(b32) EnterSynchronizationBarrier(w32_synchronization_barrier *, u32); +W32(b32) FreeLibrary(u64); +W32(void *) GetModuleHandleA(const c8 *); +W32(void *) GetProcAddress(u64, const c8 *); +W32(b32) InitializeSynchronizationBarrier(w32_synchronization_barrier *, i32, i32); +W32(void *) LoadLibraryA(const c8 *); +W32(i32) SetThreadDescription(u64, u16 *); + #define OS_DEBUG_LIB_NAME ".\\beamformer.dll" #define OS_DEBUG_LIB_TEMP_NAME ".\\beamformer_temp.dll" @@ -35,64 +53,196 @@ #define OS_RENDERDOC_SONAME "renderdoc.dll" -iptr glfwGetWGLContext(iptr); -function iptr -os_get_native_gl_context(iptr window) +enum {OSW32_FileWatchDirectoryBufferSize = KB(4)}; +typedef enum { + OSW32_FileWatchKindPlatform, + OSW32_FileWatchKindUser, +} OSW32_FileWatchKind; + +typedef struct { + OSW32_FileWatchKind kind; + u64 hash; + u64 update_time; + void * user_context; +} OSW32_FileWatch; + +typedef struct { + u64 hash; + iptr handle; + s8 name; + + OSW32_FileWatch *data; + iz count; + iz capacity; + + w32_overlapped overlapped; + w32_io_completion_event event; + + void *buffer; +} OSW32_FileWatchDirectory; +DA_STRUCT(OSW32_FileWatchDirectory, OSW32_FileWatchDirectory); + +typedef struct { + Arena arena; + i32 arena_lock; + iptr error_handle; + iptr io_completion_handle; + + OSW32_FileWatchDirectoryList file_watch_list; + + OSSystemInfo system_info; +} OSW32_Context; +global OSW32_Context os_w32_context; + +BEAMFORMER_IMPORT OSSystemInfo * +os_get_system_info(void) { - return glfwGetWGLContext(window); + return &os_w32_context.system_info; } -function iptr -os_gl_proc_address(char *name) +BEAMFORMER_IMPORT OSThread +os_create_thread(const char *name, void *user_context, os_thread_entry_point_fn *fn) { - return wglGetProcAddress(name); + OSThread result = {(u64)CreateThread(0, 0, (iptr)fn, (iptr)user_context, 0, 0)}; + if (result.value[0]) { + DeferLoop(take_lock(&os_w32_context.arena_lock, -1), release_lock(&os_w32_context.arena_lock)) + { + Arena arena = os_w32_context.arena; + SetThreadDescription(result.value[0], s8_to_s16(&arena, c_str_to_s8((c8 *)name)).data); + } + } else { + result.value[0] = OSInvalidHandleValue; + } + return result; } -#include "beamformer.c" +BEAMFORMER_IMPORT OSBarrier +os_barrier_alloc(u32 count) +{ + OSBarrier result = {0}; + DeferLoop(take_lock(&os_w32_context.arena_lock, -1), release_lock(&os_w32_context.arena_lock)) + { + w32_synchronization_barrier *barrier = push_struct(&os_w32_context.arena, w32_synchronization_barrier); + InitializeSynchronizationBarrier(barrier, (i32)count, -1); + result.value[0] = (u64)barrier; + } + return result; +} -W32(b32) FreeLibrary(u64); -W32(void *) GetModuleHandleA(c8 *); -W32(void *) GetProcAddress(u64, c8 *); -W32(void *) LoadLibraryA(c8 *); +BEAMFORMER_IMPORT void +os_barrier_enter(OSBarrier barrier) +{ + w32_synchronization_barrier *b = (w32_synchronization_barrier *)barrier.value[0]; + if (b) EnterSynchronizationBarrier(b, 0); +} -BEAMFORMER_IMPORT BEAMFORMER_OS_ADD_FILE_WATCH_FN(os_add_file_watch) +BEAMFORMER_IMPORT void +os_console_log(u8 *data, i64 length) { - s8 path_str = {.data = (u8 *)path, .len = path_length}; - os_w32_add_file_watch(path_str, user_context, OSW32_FileWatchKindUser); + os_write_file(os_w32_context.error_handle, data, length); } -BEAMFORMER_IMPORT BEAMFORMER_OS_LOOKUP_SYMBOL_FN(os_lookup_symbol) +BEAMFORMER_IMPORT void +os_fatal(u8 *data, i64 length) +{ + os_write_file(os_w32_context.error_handle, data, length); + os_exit(1); + unreachable(); +} + +BEAMFORMER_IMPORT void * +os_lookup_symbol(OSLibrary library, const char *symbol) { void *result = 0; - if ValidHandle(library) { - result = GetProcAddress(library.value[0], symbol); - if (!result && error) { - stream_append_s8s(error, s8("os_lookup_symbol(\""), c_str_to_s8(symbol), s8("\"): ")); - stream_append_i64(error, GetLastError()); - stream_append_byte(error, '\n'); - } + if ValidHandle(library) result = GetProcAddress(library.value[0], symbol); + return result; +} + +function void * +allocate_shared_memory(char *name, iz requested_capacity, u64 *capacity) +{ + u64 rounded_capacity = round_up_to(requested_capacity, os_w32_context.system_info.page_size); + void *result = 0; + iptr h = CreateFileMappingA(-1, 0, PAGE_READWRITE, (rounded_capacity >> 32u), + (rounded_capacity & 0xFFFFFFFFul), name); + if (h != INVALID_FILE) { + result = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, rounded_capacity); + if (result) *capacity = rounded_capacity; } return result; } +function OSW32_FileWatchDirectory * +os_lookup_file_watch_directory(OSW32_FileWatchDirectoryList *ctx, u64 hash) +{ + OSW32_FileWatchDirectory *result = 0; + for (iz i = 0; !result && i < ctx->count; i++) + if (ctx->data[i].hash == hash) + result = ctx->data + i; + return result; +} + +function void +os_w32_add_file_watch(s8 path, void *user_context, OSW32_FileWatchKind kind) +{ + s8 directory = path; + directory.len = s8_scan_backwards(path, '\\'); + assert(directory.len > 0); + + OSW32_FileWatchDirectoryList *fwctx = &os_w32_context.file_watch_list; + + u64 hash = u64_hash_from_s8(directory); + OSW32_FileWatchDirectory *dir = os_lookup_file_watch_directory(fwctx, hash); + if (!dir) { + assert(path.data[directory.len] == '\\'); + + dir = da_push(&os_w32_context.arena, fwctx); + dir->hash = hash; + dir->name = push_s8(&os_w32_context.arena, directory); + dir->handle = CreateFileA((c8 *)dir->name.data, GENERIC_READ, FILE_SHARE_READ, 0, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS|FILE_FLAG_OVERLAPPED, 0); + + dir->event.tag = W32IOEvent_FileWatch; + dir->event.context = (iptr)dir; + CreateIoCompletionPort(dir->handle, os_w32_context.io_completion_handle, (uptr)&dir->event, 0); + + dir->buffer = arena_alloc(&os_w32_context.arena, .size = OSW32_FileWatchDirectoryBufferSize); + ReadDirectoryChangesW(dir->handle, dir->buffer, OSW32_FileWatchDirectoryBufferSize, 0, + FILE_NOTIFY_CHANGE_LAST_WRITE, 0, &dir->overlapped, 0); + } + + OSW32_FileWatch *fw = da_push(&os_w32_context.arena, dir); + fw->user_context = user_context; + fw->hash = u64_hash_from_s8(s8_cut_head(path, dir->name.len + 1)); + fw->kind = kind; +} + +BEAMFORMER_IMPORT void +os_add_file_watch(const char *path, int64_t path_length, void *user_context) +{ + s8 path_str = {.data = (u8 *)path, .len = path_length}; + os_w32_add_file_watch(path_str, user_context, OSW32_FileWatchKindUser); +} + #if BEAMFORMER_RENDERDOC_HOOKS -function BeamformerLibraryHandle +function OSLibrary get_module(char *name) { - BeamformerLibraryHandle result = {(u64)GetModuleHandleA(name)}; - if (result.value[0] == 0) result = BeamformerInvalidHandle; + OSLibrary result = {(u64)GetModuleHandleA(name)}; + if (result.value[0] == 0) result.value[0] = OSInvalidHandleValue; return result; } #endif -function BeamformerLibraryHandle +function OSLibrary load_library(char *name, char *temp_name) { if (temp_name && os_copy_file(name, temp_name)) name = temp_name; - BeamformerLibraryHandle result = {(u64)LoadLibraryA(name)}; - if (result.value[0] == 0) result = BeamformerInvalidHandle; + OSLibrary result = {(u64)LoadLibraryA(name)}; + if (result.value[0] == 0) result.value[0] = OSInvalidHandleValue; if (temp_name) DeleteFileA(temp_name); @@ -103,11 +253,11 @@ load_library(char *name, char *temp_name) function void debug_library_reload(BeamformerInput *input) { - local_persist BeamformerLibraryHandle beamformer_library_handle = BeamformerInvalidHandle; - BeamformerLibraryHandle new_handle = load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME); + local_persist OSLibrary beamformer_library_handle = {OSInvalidHandleValue}; + OSLibrary new_handle = load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME); - if (!ValidHandle(beamformer_library_handle) && !ValidHandle(new_handle)) - os_fatal(s8("[os] failed to load: " OS_DEBUG_LIB_NAME "\n")); + if (InvalidHandle(beamformer_library_handle) && InvalidHandle(new_handle)) + fatal(s8("[os] failed to load: " OS_DEBUG_LIB_NAME "\n")); if ValidHandle(new_handle) { beamformer_debug_hot_reload(new_handle, input); @@ -131,10 +281,10 @@ load_platform_libraries(BeamformerInput *input) input->cuda_library_handle = load_library(OS_CUDA_LIB_NAME, OS_CUDA_LIB_TEMP_NAME); #if BEAMFORMER_RENDERDOC_HOOKS - local_persist BeamformerLibraryHandle renderdoc_handle = BeamformerInvalidHandle; + local_persist OSLibrary renderdoc_handle = {OSInvalidHandleValue}; renderdoc_handle = get_module(OS_RENDERDOC_SONAME); if ValidHandle(renderdoc_handle) { - renderdoc_get_api_fn *get_api = os_lookup_symbol(renderdoc_handle, "RENDERDOC_GetAPI", 0); + renderdoc_get_api_fn *get_api = os_lookup_symbol(renderdoc_handle, "RENDERDOC_GetAPI"); if (get_api) { RenderDocAPI *api = 0; if (get_api(10600, (void **)&api)) { @@ -160,10 +310,10 @@ dispatch_file_watch(BeamformerInput *input, Arena arena, u64 current_time, OSW32 Stream e = {.data = arena_commit(&arena, KB(1)), .cap = KB(1)}; if (fni->action != FILE_ACTION_MODIFIED) { - stream_append_s8(&e, s8("unknown file watch event: ")); + stream_append_s8(&e, s8("[os] unknown file watch event: ")); stream_append_u64(&e, fni->action); stream_append_byte(&e, '\n'); - os_write_file(os_w32_context.error_handle, stream_to_s8(&e)); + os_write_file(os_w32_context.error_handle, e.data, e.widx); stream_reset(&e, 0); } @@ -222,18 +372,31 @@ clear_io_queue(BeamformerInput *input, Arena arena, u64 current_time) extern i32 main(void) { - os_common_init(); + os_w32_context.error_handle = GetStdHandle(STD_ERROR_HANDLE); + os_w32_context.io_completion_handle = CreateIoCompletionPort(INVALID_FILE, 0, 0, 0); + os_w32_context.system_info.timer_frequency = os_get_timer_frequency(); + os_w32_context.system_info.path_separator_byte = '\\'; + { + w32_system_info info = {0}; + GetSystemInfo(&info); + + os_w32_context.system_info.page_size = info.page_size; + os_w32_context.system_info.logical_processor_count = info.number_of_processors; + } Arena program_memory = os_alloc_arena(MB(16) + MB(2)); + os_w32_context.arena = sub_arena(&program_memory, MB(2), os_w32_context.system_info.page_size); + + BeamformerInput *input = push_struct(&program_memory, BeamformerInput); + input->memory = program_memory.beg; + input->memory_size = program_memory.end - program_memory.beg; + input->shared_memory = allocate_shared_memory(OS_SHARED_MEMORY_NAME, BEAMFORMER_SHARED_MEMORY_SIZE, + &input->shared_memory_size); + if (input->shared_memory) { + input->shared_memory_name = s8(OS_SHARED_MEMORY_NAME).data; + input->shared_memory_name_length = s8(OS_SHARED_MEMORY_NAME).len; + } - os_w32_context.arena = sub_arena(&program_memory, MB(2), KB(4)); - os_w32_context.error_handle = GetStdHandle(STD_ERROR_HANDLE); - os_w32_context.io_completion_handle = CreateIoCompletionPort(INVALID_FILE, 0, 0, 0); - - BeamformerInput *input = push_struct(&program_memory, BeamformerInput); - input->memory = program_memory.beg; - input->memory_size = program_memory.end - program_memory.beg; - input->timer_frequency = os_w32_context.timer_frequency; input->event_queue[input->event_count++] = (BeamformerInputEvent){ .kind = BeamformerInputEventKind_ExecutableReload, }; @@ -243,11 +406,10 @@ main(void) beamformer_init(input); u64 last_time = os_get_timer_counter(); - while (!WindowShouldClose()) { + while (!WindowShouldClose() && !beamformer_should_close(input)) { u64 now = os_get_timer_counter(); - DeferLoop(os_take_lock(&os_w32_context.arena_lock, -1), - os_release_lock(&os_w32_context.arena_lock)) + DeferLoop(take_lock(&os_w32_context.arena_lock, -1), release_lock(&os_w32_context.arena_lock)) { clear_io_queue(input, os_w32_context.arena, now); } @@ -265,6 +427,5 @@ main(void) input->event_count = 0; } - beamformer_invalidate_shared_memory(program_memory.beg); - beamformer_debug_ui_deinit(program_memory.beg); + beamformer_terminate(input); } diff --git a/os_linux.c b/os_linux.c @@ -10,6 +10,7 @@ #include "util.h" +#include <errno.h> #include <fcntl.h> #include <linux/futex.h> #include <poll.h> @@ -21,76 +22,25 @@ #include <sys/sysinfo.h> #include <unistd.h> -typedef enum { - OSLinux_FileWatchKindPlatform, - OSLinux_FileWatchKindUser, -} OSLinux_FileWatchKind; - -typedef struct { - OSLinux_FileWatchKind kind; - u64 hash; - u64 update_time; - void * user_context; -} OSLinux_FileWatch; - -typedef struct { - u64 hash; - iptr handle; - s8 name; - - OSLinux_FileWatch * data; - iz count; - iz capacity; -} OSLinux_FileWatchDirectory; -DA_STRUCT(OSLinux_FileWatchDirectory, OSLinux_FileWatchDirectory); - -typedef struct { - Arena arena; - i32 arena_lock; - i32 inotify_handle; - OS_SystemInfo system_info; - - OSLinux_FileWatchDirectoryList file_watch_list; -} OSLinux_Context; -global OSLinux_Context os_linux_context; - -function OS_WRITE_FILE_FN(os_write_file) +function b32 +os_write_file(i32 file, void *data, i64 length) { - while (raw.len > 0) { - iz r = write((i32)file, raw.data, (uz)raw.len); - if (r < 0) return 0; - raw = s8_cut_head(raw, r); + i64 offset = 0; + while (offset < length) { + iz r = write(file, (u8 *)data + offset, length - offset); + if (r < 0 && errno != EINTR) break; + if (r >= 0) offset += r; } - return 1; + return offset == length; } -function void __attribute__((noreturn)) +function no_return void os_exit(i32 code) { _exit(code); unreachable(); } -function void __attribute__((noreturn)) -os_fatal(s8 msg) -{ - os_write_file(STDERR_FILENO, msg); - os_exit(1); - unreachable(); -} - -function iptr -os_error_handle(void) -{ - return STDERR_FILENO; -} - -function s8 -os_path_separator(void) -{ - return s8("/"); -} - function u64 os_get_timer_frequency(void) { @@ -106,43 +56,32 @@ os_get_timer_counter(void) return result; } -function void -os_common_init(void) -{ - os_linux_context.system_info.logical_processor_count = (u32)get_nprocs(); - os_linux_context.system_info.page_size = (u32)getpagesize(); -} - -function iz -os_round_up_to_page_size(iz value) -{ - iz result = round_up_to(value, os_linux_context.system_info.page_size); - return result; -} - function OS_ALLOC_ARENA_FN(os_alloc_arena) { Arena result = {0}; - capacity = os_round_up_to_page_size(capacity); - result.beg = mmap(0, (uz)capacity, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (result.beg == MAP_FAILED) - os_fatal(s8("os_alloc_arena: couldn't allocate memory\n")); - result.end = result.beg + capacity; - asan_poison_region(result.beg, result.end - result.beg); + capacity = round_up_to(capacity, getpagesize()); + void *memory = mmap(0, (uz)capacity, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (memory != MAP_FAILED) { + result.beg = memory; + result.end = result.beg + capacity; + asan_poison_region(result.beg, result.end - result.beg); + } return result; } -function OS_READ_WHOLE_FILE_FN(os_read_whole_file) +BEAMFORMER_IMPORT OS_READ_ENTIRE_FILE_FN(os_read_entire_file) { - s8 result = s8(""); - + i64 result = 0; struct stat sb; i32 fd = open(file, O_RDONLY); if (fd >= 0 && fstat(fd, &sb) >= 0) { - result = s8_alloc(arena, sb.st_size); - iz rlen = read(fd, result.data, (uz)result.len); - if (rlen != result.len) - result = s8(""); + if (buffer_capacity >= sb.st_size) { + do { + i64 rlen = read(fd, (u8 *)buffer + result, sb.st_size - result); + if (rlen > 0) result += rlen; + } while (result != sb.st_size && errno != EINTR); + if (result != sb.st_size) result = 0; + } } if (fd >= 0) close(fd); @@ -154,7 +93,7 @@ function OS_WRITE_NEW_FILE_FN(os_write_new_file) b32 result = 0; i32 fd = open(fname, O_WRONLY|O_TRUNC|O_CREAT, 0600); if (fd != INVALID_FILE) { - result = os_write_file(fd, raw); + result = os_write_file(fd, raw.data, raw.len); close(fd); } return result; @@ -168,20 +107,6 @@ os_file_exists(char *path) return result; } -function SharedMemoryRegion -os_create_shared_memory_area(Arena *arena, char *name, u32 lock_count, iz requested_capacity) -{ - iz capacity = os_round_up_to_page_size(requested_capacity); - SharedMemoryRegion result = {0}; - i32 fd = shm_open(name, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); - if (fd > 0 && ftruncate(fd, capacity) != -1) { - void *new = mmap(0, (uz)capacity, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (new != MAP_FAILED) result.region = new; - } - if (fd > 0) close(fd); - return result; -} - /* NOTE: complete garbage because there is no standarized copyfile() in POSix */ function b32 os_copy_file(char *name, char *new) @@ -209,43 +134,7 @@ os_copy_file(char *name, char *new) return result; } -function OSLinux_FileWatchDirectory * -os_lookup_file_watch_directory(OSLinux_FileWatchDirectoryList *ctx, u64 hash) -{ - OSLinux_FileWatchDirectory *result = 0; - for (iz i = 0; !result && i < ctx->count; i++) - if (ctx->data[i].hash == hash) - result = ctx->data + i; - return result; -} - -function void -os_linux_add_file_watch(s8 path, void *user_context, OSLinux_FileWatchKind kind) -{ - s8 directory = path; - directory.len = s8_scan_backwards(path, '/'); - assert(directory.len > 0); - - OSLinux_FileWatchDirectoryList *fwctx = &os_linux_context.file_watch_list; - - u64 hash = u64_hash_from_s8(directory); - OSLinux_FileWatchDirectory *dir = os_lookup_file_watch_directory(fwctx, hash); - if (!dir) { - assert(path.data[directory.len] == '/'); - dir = da_push(&os_linux_context.arena, fwctx); - dir->hash = hash; - dir->name = push_s8(&os_linux_context.arena, directory); - u32 mask = IN_MOVED_TO|IN_CLOSE_WRITE; - dir->handle = inotify_add_watch(os_linux_context.inotify_handle, (c8 *)dir->name.data, mask); - } - - OSLinux_FileWatch *fw = da_push(&os_linux_context.arena, dir); - fw->user_context = user_context; - fw->hash = u64_hash_from_s8(s8_cut_head(path, dir->name.len + 1)); - fw->kind = kind; -} - -function OS_WAIT_ON_VALUE_FN(os_wait_on_value) +BEAMFORMER_IMPORT OS_WAIT_ON_ADDRESS_FN(os_wait_on_address) { struct timespec *timeout = 0, timeout_value; if (timeout_ms != (u32)-1) { @@ -256,88 +145,10 @@ function OS_WAIT_ON_VALUE_FN(os_wait_on_value) return syscall(SYS_futex, value, FUTEX_WAIT, current, timeout, 0, 0) == 0; } -function OS_WAKE_WAITERS_FN(os_wake_waiters) +BEAMFORMER_IMPORT OS_WAKE_ALL_WAITERS_FN(os_wake_all_waiters) { if (sync) { atomic_store_u32(sync, 0); syscall(SYS_futex, sync, FUTEX_WAKE, I32_MAX, 0, 0, 0); } } - -function b32 -os_take_lock(i32 *lock, i32 timeout_ms) -{ - b32 result = 0; - for (;;) { - i32 current = 0; - if (atomic_cas_u32(lock, &current, 1)) - result = 1; - if (result || !timeout_ms || (!os_wait_on_value(lock, current, (u32)timeout_ms) && timeout_ms != -1)) - break; - } - return result; -} - -function void -os_release_lock(i32 *lock) -{ - assert(atomic_load_u32(lock)); - atomic_store_u32(lock, 0); - os_wake_waiters(lock); -} - -function OS_SHARED_MEMORY_LOCK_REGION_FN(os_shared_memory_region_lock) -{ - b32 result = os_take_lock(locks + lock_index, (i32)timeout_ms); - return result; -} - -function OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock) -{ - os_release_lock(locks + lock_index); -} - -function OS_SystemInfo * -os_get_system_info(void) -{ - return &os_linux_context.system_info; -} - -function Barrier -os_barrier_alloc(u32 count) -{ - Barrier result = {0}; - DeferLoop(os_take_lock(&os_linux_context.arena_lock, -1), - os_release_lock(&os_linux_context.arena_lock)) - { - pthread_barrier_t *barrier = push_struct(&os_linux_context.arena, pthread_barrier_t); - pthread_barrier_init(barrier, 0, count); - result.value[0] = (u64)barrier; - } - return result; -} - -function void -os_barrier_wait(Barrier barrier) -{ - pthread_barrier_t *b = (pthread_barrier_t *)barrier.value[0]; - if (b) pthread_barrier_wait(b); -} - -function iptr -os_create_thread(iptr user_context, os_thread_entry_point_fn *fn) -{ - pthread_t result; - pthread_create(&result, 0, (void *)fn, (void *)user_context); - return (iptr)result; -} - -function void -os_set_thread_name(iptr thread, s8 name) -{ - char buffer[16]; - u64 length = (u64)CLAMP(name.len, 0, countof(buffer) - 1); - mem_copy(buffer, name.data, length); - buffer[length] = 0; - pthread_setname_np((pthread_t)thread, buffer); -} diff --git a/os_win32.c b/os_win32.c @@ -58,14 +58,6 @@ typedef struct { } w32_file_notify_info; typedef struct { - u32 reserved1; - u32 reserved2; - u64 Reserved3[2]; - u32 reserved4; - u32 reserved5; -} w32_synchronization_barrier; - -typedef struct { u16 architecture; u16 _pad1; u32 page_size; @@ -109,9 +101,7 @@ W32(iptr) CreateFileA(c8 *, u32, u32, void *, u32, u32, void *); W32(iptr) CreateFileMappingA(iptr, void *, u32, u32, u32, c8 *); W32(iptr) CreateIoCompletionPort(iptr, iptr, uptr, u32); W32(iptr) CreateSemaphoreA(iptr, i32, i32, c8 *); -W32(iptr) CreateThread(iptr, uz, iptr, iptr, u32, u32 *); W32(b32) DeleteFileA(c8 *); -W32(b32) EnterSynchronizationBarrier(w32_synchronization_barrier *, u32); W32(void) ExitProcess(i32); W32(i32) GetFileAttributesA(c8 *); W32(b32) GetFileInformationByHandle(iptr, void *); @@ -119,67 +109,24 @@ W32(i32) GetLastError(void); W32(b32) GetQueuedCompletionStatus(iptr, u32 *, uptr *, w32_overlapped **, u32); W32(iptr) GetStdHandle(i32); W32(void) GetSystemInfo(w32_system_info *); -W32(b32) InitializeSynchronizationBarrier(w32_synchronization_barrier *, i32, i32); W32(void *) MapViewOfFile(iptr, u32, u32, u32, u64); W32(b32) QueryPerformanceCounter(u64 *); W32(b32) QueryPerformanceFrequency(u64 *); W32(b32) ReadDirectoryChangesW(iptr, u8 *, u32, b32, u32, u32 *, void *, void *); W32(b32) ReadFile(iptr, u8 *, i32, i32 *, void *); W32(b32) ReleaseSemaphore(iptr, i32, i32 *); -W32(i32) SetThreadDescription(iptr, u16 *); W32(u32) WaitForSingleObject(iptr, u32); W32(b32) WaitOnAddress(void *, void *, uz, u32); W32(i32) WakeByAddressAll(void *); -W32(iptr) wglGetProcAddress(c8 *); W32(b32) WriteFile(iptr, u8 *, i32, i32 *, void *); W32(void *) VirtualAlloc(u8 *, iz, u32, u32); -enum {OSW32_FileWatchDirectoryBufferSize = KB(4)}; -typedef enum { - OSW32_FileWatchKindPlatform, - OSW32_FileWatchKindUser, -} OSW32_FileWatchKind; - -typedef struct { - OSW32_FileWatchKind kind; - u64 hash; - u64 update_time; - void * user_context; -} OSW32_FileWatch; - -typedef struct { - u64 hash; - iptr handle; - s8 name; - - OSW32_FileWatch *data; - iz count; - iz capacity; - - w32_overlapped overlapped; - w32_io_completion_event event; - - void *buffer; -} OSW32_FileWatchDirectory; -DA_STRUCT(OSW32_FileWatchDirectory, OSW32_FileWatchDirectory); - -typedef struct { - Arena arena; - i32 arena_lock; - iptr error_handle; - iptr io_completion_handle; - u64 timer_frequency; - OS_SystemInfo system_info; - - OSW32_FileWatchDirectoryList file_watch_list; -} OSW32_Context; -global OSW32_Context os_w32_context; - -function OS_WRITE_FILE_FN(os_write_file) +function b32 +os_write_file(iptr file, void *data, i64 length) { i32 wlen = 0; - if (raw.len > 0 && raw.len <= (iz)U32_MAX) WriteFile(file, raw.data, (i32)raw.len, &wlen, 0); - return raw.len == wlen; + if (length > 0 && length <= (i64)U32_MAX) WriteFile(file, data, (i32)length, &wlen, 0); + return length == wlen; } function no_return void @@ -189,30 +136,11 @@ os_exit(i32 code) unreachable(); } -function no_return void -os_fatal(s8 msg) -{ - os_write_file(GetStdHandle(STD_ERROR_HANDLE), msg); - os_exit(1); - unreachable(); -} - -function iptr -os_error_handle(void) -{ - return os_w32_context.error_handle; -} - -function s8 -os_path_separator(void) -{ - return s8("\\"); -} - function u64 os_get_timer_frequency(void) { - u64 result = os_w32_context.timer_frequency; + u64 result; + QueryPerformanceFrequency(&result); return result; } @@ -224,51 +152,40 @@ os_get_timer_counter(void) return result; } -function void -os_common_init(void) +function u32 +os_get_page_size(void) { w32_system_info info = {0}; GetSystemInfo(&info); - - os_w32_context.system_info.page_size = info.page_size; - os_w32_context.system_info.logical_processor_count = info.number_of_processors; - - QueryPerformanceFrequency(&os_w32_context.timer_frequency); -} - -function iz -os_round_up_to_page_size(iz value) -{ - iz result = round_up_to(value, os_w32_context.system_info.page_size); + u32 result = info.page_size; return result; } function OS_ALLOC_ARENA_FN(os_alloc_arena) { Arena result = {0}; - capacity = os_round_up_to_page_size(capacity); + capacity = round_up_to(capacity, os_get_page_size()); result.beg = VirtualAlloc(0, capacity, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - if (!result.beg) - os_fatal(s8("os_alloc_arena: couldn't allocate memory\n")); - result.end = result.beg + capacity; - asan_poison_region(result.beg, result.end - result.beg); + if (result.beg) { + result.end = result.beg + capacity; + asan_poison_region(result.beg, result.end - result.beg); + } return result; } -function OS_READ_WHOLE_FILE_FN(os_read_whole_file) +BEAMFORMER_IMPORT OS_READ_ENTIRE_FILE_FN(os_read_entire_file) { - s8 result = s8(""); - + i64 result = 0; w32_file_info fileinfo; - iptr h = CreateFileA(file, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); + iptr h = CreateFileA((c8 *)file, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); if (h >= 0 && GetFileInformationByHandle(h, &fileinfo)) { iz filesize = (iz)fileinfo.nFileSizeHigh << 32; filesize |= (iz)fileinfo.nFileSizeLow; - if (filesize <= (iz)U32_MAX) { - result = s8_alloc(arena, filesize); + if (buffer_capacity >= filesize) { + result = filesize; i32 rlen; - if (!ReadFile(h, result.data, (i32)result.len, &rlen, 0) || rlen != result.len) - result = s8(""); + if (!ReadFile(h, buffer, (i32)filesize, &rlen, 0) || rlen != filesize) + result = 0; } } if (h >= 0) CloseHandle(h); @@ -282,11 +199,10 @@ function OS_WRITE_NEW_FILE_FN(os_write_new_file) iptr h = CreateFileA(fname, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0); if (h >= 0) { while (raw.len > 0) { - s8 chunk = raw; - chunk.len = MIN(chunk.len, (iz)GB(2)); - result = os_write_file(h, chunk); + i64 length = MIN(raw.len, (iz)GB(2)); + result = os_write_file(h, raw.data, length); if (!result) break; - raw = s8_cut_head(raw, chunk.len); + raw = s8_cut_head(raw, length); } CloseHandle(h); } @@ -300,102 +216,18 @@ os_file_exists(char *path) return result; } -function SharedMemoryRegion -os_create_shared_memory_area(Arena *arena, char *name, u32 lock_count, iz requested_capacity) -{ - iz capacity = os_round_up_to_page_size(requested_capacity); - assert(capacity <= (iz)U32_MAX); - SharedMemoryRegion result = {0}; - iptr h = CreateFileMappingA(-1, 0, PAGE_READWRITE, 0, (u32)capacity, name); - if (h != INVALID_FILE) { - void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, (u32)capacity); - if (new) { - w32_shared_memory_context *ctx = push_struct(arena, typeof(*ctx)); - ctx->semaphores = push_array(arena, typeof(*ctx->semaphores), lock_count); - ctx->reserved_count = lock_count; - result.os_context = (iptr)ctx; - result.region = new; - - Stream sb = arena_stream(*arena); - stream_append_s8s(&sb, c_str_to_s8(name), s8("_lock_")); - for (u32 i = 0; i < lock_count; i++) { - Stream lb = sb; - stream_append_u64(&lb, i); - stream_append_byte(&lb, 0); - ctx->semaphores[i] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data); - if (ctx->semaphores[i] == INVALID_FILE) - os_fatal(s8("os_create_shared_memory_area: failed to create semaphore\n")); - - /* NOTE(rnp): hacky garbage because CreateSemaphore will just open an existing - * semaphore without any indication. Sometimes the other side of the shared memory - * will provide incorrect parameters or will otherwise fail and its faster to - * restart this program than to get that application to release the semaphores */ - /* TODO(rnp): figure out something more robust */ - ReleaseSemaphore(ctx->semaphores[i], 1, 0); - } - } - } - return result; -} - function b32 os_copy_file(char *name, char *new) { return CopyFileA(name, new, 0); } -function OSW32_FileWatchDirectory * -os_lookup_file_watch_directory(OSW32_FileWatchDirectoryList *ctx, u64 hash) -{ - OSW32_FileWatchDirectory *result = 0; - for (iz i = 0; !result && i < ctx->count; i++) - if (ctx->data[i].hash == hash) - result = ctx->data + i; - return result; -} - -function void -os_w32_add_file_watch(s8 path, void *user_context, OSW32_FileWatchKind kind) -{ - s8 directory = path; - directory.len = s8_scan_backwards(path, '\\'); - assert(directory.len > 0); - - OSW32_FileWatchDirectoryList *fwctx = &os_w32_context.file_watch_list; - - u64 hash = u64_hash_from_s8(directory); - OSW32_FileWatchDirectory *dir = os_lookup_file_watch_directory(fwctx, hash); - if (!dir) { - assert(path.data[directory.len] == '\\'); - - dir = da_push(&os_w32_context.arena, fwctx); - dir->hash = hash; - dir->name = push_s8(&os_w32_context.arena, directory); - dir->handle = CreateFileA((c8 *)dir->name.data, GENERIC_READ, FILE_SHARE_READ, 0, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS|FILE_FLAG_OVERLAPPED, 0); - - dir->event.tag = W32IOEvent_FileWatch; - dir->event.context = (iptr)dir; - CreateIoCompletionPort(dir->handle, os_w32_context.io_completion_handle, (uptr)&dir->event, 0); - - dir->buffer = arena_alloc(&os_w32_context.arena, .size = OSW32_FileWatchDirectoryBufferSize); - ReadDirectoryChangesW(dir->handle, dir->buffer, OSW32_FileWatchDirectoryBufferSize, 0, - FILE_NOTIFY_CHANGE_LAST_WRITE, 0, &dir->overlapped, 0); - } - - OSW32_FileWatch *fw = da_push(&os_w32_context.arena, dir); - fw->user_context = user_context; - fw->hash = u64_hash_from_s8(s8_cut_head(path, dir->name.len + 1)); - fw->kind = kind; -} - -function OS_WAIT_ON_VALUE_FN(os_wait_on_value) +BEAMFORMER_IMPORT OS_WAIT_ON_ADDRESS_FN(os_wait_on_address) { return WaitOnAddress(value, &current, sizeof(*value), timeout_ms); } -function OS_WAKE_WAITERS_FN(os_wake_waiters) +BEAMFORMER_IMPORT OS_WAKE_ALL_WAITERS_FN(os_wake_all_waiters) { if (sync) { atomic_store_u32(sync, 0); @@ -403,84 +235,22 @@ function OS_WAKE_WAITERS_FN(os_wake_waiters) } } -function b32 -os_take_lock(i32 *lock, i32 timeout_ms) -{ - b32 result = 0; - for (;;) { - i32 current = 0; - if (atomic_cas_u32(lock, &current, 1)) - result = 1; - if (result || !timeout_ms || (!os_wait_on_value(lock, current, (u32)timeout_ms) && timeout_ms != -1)) - break; - } - return result; -} - -function void -os_release_lock(i32 *lock) +BEAMFORMER_IMPORT OSW32Semaphore +os_w32_create_semaphore(const char *name, i32 initial_count, i32 maximum_count) { - assert(atomic_load_u32(lock)); - atomic_store_u32(lock, 0); - os_wake_waiters(lock); -} - -function OS_SHARED_MEMORY_LOCK_REGION_FN(os_shared_memory_region_lock) -{ - w32_shared_memory_context *ctx = (typeof(ctx))sm->os_context; - b32 result = !WaitForSingleObject(ctx->semaphores[lock_index], timeout_ms); - if (result) atomic_store_u32(locks + lock_index, 1); - return result; -} - -function OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock) -{ - w32_shared_memory_context *ctx = (typeof(ctx))sm->os_context; - os_release_lock(locks + lock_index); - ReleaseSemaphore(ctx->semaphores[lock_index], 1, 0); -} - -function OS_SystemInfo * -os_get_system_info(void) -{ - return &os_w32_context.system_info; -} - -function Barrier -os_barrier_alloc(u32 count) -{ - Barrier result = {0}; - DeferLoop(os_take_lock(&os_w32_context.arena_lock, -1), - os_release_lock(&os_w32_context.arena_lock)) - { - w32_synchronization_barrier *barrier = push_struct(&os_w32_context.arena, w32_synchronization_barrier); - InitializeSynchronizationBarrier(barrier, (i32)count, -1); - result.value[0] = (u64)barrier; - } + OSW32Semaphore result = {(u64)CreateSemaphoreA(0, initial_count, maximum_count, (c8 *)name)}; return result; } -function void -os_barrier_wait(Barrier barrier) +BEAMFORMER_IMPORT u32 +os_w32_semaphore_wait(OSW32Semaphore handle, u32 timeout_ms) { - w32_synchronization_barrier *b = (w32_synchronization_barrier *)barrier.value[0]; - if (b) EnterSynchronizationBarrier(b, 0); -} - -function iptr -os_create_thread(iptr user_context, os_thread_entry_point_fn *fn) -{ - iptr result = CreateThread(0, 0, (iptr)fn, user_context, 0, 0); + b32 result = !WaitForSingleObject(handle.value[0], timeout_ms); return result; } -function void -os_set_thread_name(iptr thread, s8 name) +BEAMFORMER_IMPORT void +os_w32_semaphore_release(OSW32Semaphore handle, i32 count) { - DeferLoop(os_take_lock(&os_w32_context.arena_lock, -1), - os_release_lock(&os_w32_context.arena_lock)) - { - Arena arena = os_w32_context.arena; - SetThreadDescription(thread, s8_to_s16(&arena, name).data); - } + ReleaseSemaphore(handle.value[0], count, 0); } diff --git a/tests/throughput.c b/tests/throughput.c @@ -74,13 +74,6 @@ die_(char *function_name, char *format, ...) #include <sys/stat.h> #include <unistd.h> -function f64 -os_get_time(void) -{ - f64 result = (f64)os_get_timer_counter() / (f64)os_get_timer_frequency(); - return result; -} - function s8 os_read_file_simp(char *fname) { @@ -109,13 +102,6 @@ os_read_file_simp(char *fname) #elif OS_WINDOWS -function f64 -os_get_time(void) -{ - f64 result = (f64)os_get_timer_counter() / (f64)os_w32_context.timer_frequency; - return result; -} - function s8 os_read_file_simp(char *fname) { @@ -416,11 +402,12 @@ execute_study(s8 study, Arena arena, Stream path, Options *options) u32 frame = 0; f32 times[32] = {0}; f32 data_size = (f32)(bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1] * sizeof(*data)); - f64 start = os_get_time(); + u64 start = os_get_timer_counter(); + f64 frequency = os_get_timer_frequency(); for (;!g_should_exit;) { if (send_frame(data, &bp)) { - f64 now = os_get_time(); - f32 delta = (f32)(now - start); + u64 now = os_get_timer_counter(); + f64 delta = (now - start) / frequency; start = now; if ((frame % 16) == 0) { @@ -464,8 +451,6 @@ main(i32 argc, char *argv[]) if (!BETWEEN(options.remaining_count, 1, 2)) usage(argv[0]); - os_common_init(); - signal(SIGINT, sigint); Arena arena = os_alloc_arena(KB(8)); diff --git a/threads.c b/threads.c @@ -22,11 +22,11 @@ thread_context_barrier_wait(void *broadcast, u64 broadcast_size, u64 broadcast_l if (broadcast && lane_index() == broadcast_lane_index) mem_copy(ctx->lane_context.broadcast_memory, broadcast, broadcast_size_clamped); - os_barrier_wait(ctx->lane_context.barrier); + os_barrier_enter(ctx->lane_context.barrier); if (broadcast && lane_index() != broadcast_lane_index) mem_copy(broadcast, ctx->lane_context.broadcast_memory, broadcast_size_clamped); if (broadcast) - os_barrier_wait(ctx->lane_context.barrier); + os_barrier_enter(ctx->lane_context.barrier); } diff --git a/ui.c b/ui.c @@ -432,8 +432,8 @@ struct BeamformerUI { FrameViewRenderContext *frame_view_render_context; - SharedMemoryRegion shared_memory; - BeamformerCtx *beamformer_context; + BeamformerSharedMemory * shared_memory; + BeamformerCtx * beamformer_context; }; typedef enum { @@ -1413,8 +1413,7 @@ add_compute_stats_view(BeamformerUI *ui, Variable *parent, Arena *arena, Beamfor function Variable * add_live_controls_view(BeamformerUI *ui, Variable *parent, Arena *arena) { - BeamformerSharedMemory *sm = ui->shared_memory.region; - BeamformerLiveImagingParameters *lip = &sm->live_imaging_parameters; + BeamformerLiveImagingParameters *lip = &ui->shared_memory->live_imaging_parameters; /* TODO(rnp): this can be closable once we have a way of opening new views */ Variable *result = add_ui_view(ui, parent, &ui->arena, s8("Live Controls"), 0, 1, 0); result->view.child = add_variable(ui, result, &ui->arena, s8(""), 0, @@ -1559,8 +1558,7 @@ x_plane_position(BeamformerUI *ui) function v3 offset_x_plane_position(BeamformerUI *ui, BeamformerFrameView *view, BeamformerViewPlaneTag tag) { - BeamformerSharedMemory *sm = ui->shared_memory.region; - BeamformerLiveImagingParameters *li = &sm->live_imaging_parameters; + BeamformerLiveImagingParameters *li = &ui->shared_memory->live_imaging_parameters; m4 x_rotation = m4_rotation_about_y(x_plane_rotation_for_view_plane(view, tag)); v3 Z = x_rotation.c[2].xyz; v3 offset = v3_scale(Z, li->image_plane_offsets[tag]); @@ -2835,8 +2833,7 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v function v2 draw_live_controls_view(BeamformerUI *ui, Variable *var, Rect r, v2 mouse, Arena arena) { - BeamformerSharedMemory *sm = ui->shared_memory.region; - BeamformerLiveImagingParameters *lip = &sm->live_imaging_parameters; + BeamformerLiveImagingParameters *lip = &ui->shared_memory->live_imaging_parameters; BeamformerLiveControlsView *lv = var->generic; TextSpec text_spec = {.font = &ui->font, .colour = FG_COLOUR, .flags = TF_LIMITED}; @@ -3116,8 +3113,7 @@ draw_ui_view(BeamformerUI *ui, Variable *ui_view, Rect r, v2 mouse, TextSpec tex case VT_LIVE_CONTROLS_VIEW:{ if (view->rect.size.h - r.size.h < 0) r.pos.y += 0.5f * (r.size.h - view->rect.size.h); - BeamformerSharedMemory *sm = ui->shared_memory.region; - if (sm->live_imaging_parameters.active) + if (ui->shared_memory->live_imaging_parameters.active) size = draw_live_controls_view(ui, var, r, mouse, ui->arena); }break; InvalidDefaultCase; @@ -3700,9 +3696,8 @@ function void ui_live_control_update(BeamformerUI *ui, Variable *controls) { assert(controls->type == VT_LIVE_CONTROLS_VIEW); - BeamformerSharedMemory *sm = ui->shared_memory.region; BeamformerLiveControlsView *lv = controls->generic; - atomic_or_u32(&sm->live_imaging_dirty_flags, lv->active_field_flag); + atomic_or_u32(&ui->shared_memory->live_imaging_dirty_flags, lv->active_field_flag); } function void @@ -3727,8 +3722,8 @@ ui_end_interact(BeamformerUI *ui, v2 mouse) f32 delta = v3_dot(Z, v3_sub(xp->end_point, xp->start_point)); xp->start_point = xp->end_point; - BeamformerSharedMemory *sm = ui->shared_memory.region; - BeamformerLiveImagingParameters *li = &sm->live_imaging_parameters; + BeamformerSharedMemory * sm = ui->shared_memory; + BeamformerLiveImagingParameters * li = &sm->live_imaging_parameters; li->image_plane_offsets[plane] += delta; atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_ImagePlaneOffsets); }break; @@ -3995,12 +3990,12 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformerFrame *frame_to_dr u32 selected_block = ui->selected_parameter_block % BeamformerMaxParameterBlockSlots; u32 selected_mask = 1 << selected_block; if (ctx->ui_dirty_parameter_blocks & selected_mask) { - BeamformerParameterBlock *pb = beamformer_parameter_block_lock(&ctx->shared_memory, selected_block, 0); + BeamformerParameterBlock *pb = beamformer_parameter_block_lock(ui->shared_memory, selected_block, 0); if (pb) { mem_copy(&ui->params, &pb->parameters_ui, sizeof(ui->params)); ui->flush_params = 0; atomic_and_u32(&ctx->ui_dirty_parameter_blocks, ~selected_mask); - beamformer_parameter_block_unlock(&ctx->shared_memory, selected_block); + beamformer_parameter_block_unlock(ui->shared_memory, selected_block); } } @@ -4012,13 +4007,13 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformerFrame *frame_to_dr if (ui->flush_params) { validate_ui_parameters(&ui->params); if (ctx->latest_frame) { - BeamformerParameterBlock *pb = beamformer_parameter_block_lock(&ctx->shared_memory, selected_block, 0); + BeamformerParameterBlock *pb = beamformer_parameter_block_lock(ui->shared_memory, selected_block, 0); if (pb) { ui->flush_params = 0; mem_copy(&pb->parameters_ui, &ui->params, sizeof(ui->params)); - mark_parameter_block_region_dirty(ctx->shared_memory.region, selected_block, + mark_parameter_block_region_dirty(ui->shared_memory, selected_block, BeamformerParameterBlockRegion_Parameters); - beamformer_parameter_block_unlock(&ctx->shared_memory, selected_block); + beamformer_parameter_block_unlock(ui->shared_memory, selected_block); beamformer_queue_compute(ctx, frame_to_draw, selected_block); } diff --git a/util.h b/util.h @@ -4,14 +4,6 @@ #include "compiler.h" -/* NOTE: glibc devs are actually buffoons who never write any real code - * the following headers include a bunch of other headers which need this crap defined first */ -#if OS_LINUX - #ifndef _GNU_SOURCE - #define _GNU_SOURCE - #endif -#endif - #include <stddef.h> #include <stdint.h> @@ -313,35 +305,27 @@ typedef struct { b32 errors; } Stream; -#define INVALID_FILE (-1) -#define InvalidHandle (Handle){(u64)(-1)} -#define ValidHandle(h) ((h).value[0] != InvalidHandle.value[0]) - -typedef struct OS OS; +#define INVALID_FILE (-1) -typedef struct { - Arena arena; - iptr handle; - iptr window_handle; - iptr gl_context; - iptr user_context; - i32 sync_variable; - b32 awake; -} GLWorkerThreadContext; +#ifndef OSInvalidHandleValue + #define OSInvalidHandleValue ((u64)-1) + typedef struct { u64 value[1]; } OSBarrier; + typedef struct { u64 value[1]; } OSHandle; + typedef struct { u64 value[1]; } OSLibrary; + typedef struct { u64 value[1]; } OSThread; + typedef struct { u64 value[1]; } OSW32Semaphore; +#endif -typedef struct { - void *region; - iptr os_context; -} SharedMemoryRegion; +#define ValidHandle(h) ((h).value[0] != OSInvalidHandleValue) +#define InvalidHandle(h) ((h).value[0] == OSInvalidHandleValue) -typedef struct { u64 value[1]; } Barrier; -typedef struct { u64 value[1]; } Handle; +typedef struct OS OS; typedef struct { - u64 index; - u64 count; - Barrier barrier; - u64 *broadcast_memory; + u64 index; + u64 count; + OSBarrier barrier; + u64 * broadcast_memory; } LaneContext; typedef struct { @@ -351,36 +335,15 @@ typedef struct { LaneContext lane_context; } ThreadContext; -#define OS_ALLOC_ARENA_FN(name) Arena name(iz capacity) -typedef OS_ALLOC_ARENA_FN(os_alloc_arena_fn); - -#define OS_WAKE_WORKER_FN(name) void name(GLWorkerThreadContext *ctx) -typedef OS_WAKE_WORKER_FN(os_wake_worker_fn); - -#define OS_READ_WHOLE_FILE_FN(name) s8 name(Arena *arena, char *file) -typedef OS_READ_WHOLE_FILE_FN(os_read_whole_file_fn); - -#define OS_WAIT_ON_VALUE_FN(name) b32 name(i32 *value, i32 current, u32 timeout_ms) -typedef OS_WAIT_ON_VALUE_FN(os_wait_on_value_fn); - -#define OS_WAKE_WAITERS_FN(name) void name(i32 *sync) -typedef OS_WAKE_WAITERS_FN(os_wake_waiters_fn); +#define OS_ALLOC_ARENA_FN(name) Arena name(iz capacity) +#define OS_READ_ENTIRE_FILE_FN(name) i64 name(const char *file, void *buffer, i64 buffer_capacity) +#define OS_WAIT_ON_ADDRESS_FN(name) b32 name(i32 *value, i32 current, u32 timeout_ms) +#define OS_WAKE_ALL_WAITERS_FN(name) void name(i32 *sync) +#define OS_THREAD_ENTRY_POINT_FN(name) u64 name(void *user_context) #define OS_WRITE_NEW_FILE_FN(name) b32 name(char *fname, s8 raw) typedef OS_WRITE_NEW_FILE_FN(os_write_new_file_fn); -#define OS_WRITE_FILE_FN(name) b32 name(iptr file, s8 raw) -typedef OS_WRITE_FILE_FN(os_write_file_fn); - -#define OS_THREAD_ENTRY_POINT_FN(name) iptr name(iptr _ctx) -typedef OS_THREAD_ENTRY_POINT_FN(os_thread_entry_point_fn); - -#define OS_SHARED_MEMORY_LOCK_REGION_FN(name) b32 name(SharedMemoryRegion *sm, i32 *locks, i32 lock_index, u32 timeout_ms) -typedef OS_SHARED_MEMORY_LOCK_REGION_FN(os_shared_memory_region_lock_fn); - -#define OS_SHARED_MEMORY_UNLOCK_REGION_FN(name) void name(SharedMemoryRegion *sm, i32 *locks, i32 lock_index) -typedef OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock_fn); - #define RENDERDOC_GET_API_FN(name) b32 name(u32 version, void **out_api) typedef RENDERDOC_GET_API_FN(renderdoc_get_api_fn); @@ -395,11 +358,6 @@ typedef alignas(16) u8 RenderDocAPI[216]; #define RENDERDOC_START_FRAME_CAPTURE(a) (renderdoc_start_frame_capture_fn *)RENDERDOC_API_FN_ADDR(a, 152) #define RENDERDOC_END_FRAME_CAPTURE(a) (renderdoc_end_frame_capture_fn *) RENDERDOC_API_FN_ADDR(a, 168) -typedef struct { - u32 logical_processor_count; - u32 page_size; -} OS_SystemInfo; - #define LABEL_GL_OBJECT(type, id, s) {s8 _s = (s); glObjectLabel(type, id, (i32)_s.len, (c8 *)_s.data);} #include "util.c" diff --git a/util_gl.c b/util_gl.c @@ -18,7 +18,7 @@ compile_shader(Arena a, u32 type, s8 shader, s8 name) glGetShaderInfoLog(sid, len, &out_len, (char *)(buf.data + buf.widx)); stream_commit(&buf, out_len); glDeleteShader(sid); - os_write_file(os_error_handle(), stream_to_s8(&buf)); + os_console_log(buf.data, buf.widx); sid = 0; } @@ -42,7 +42,7 @@ link_program(Arena a, u32 *shader_ids, i32 shader_id_count) glGetProgramInfoLog(result, buf.cap - buf.widx, &len, (c8 *)(buf.data + buf.widx)); stream_reset(&buf, len); stream_append_byte(&buf, '\n'); - os_write_file(os_error_handle(), stream_to_s8(&buf)); + os_console_log(buf.data, buf.widx); glDeleteProgram(result); result = 0; } diff --git a/util_os.c b/util_os.c @@ -0,0 +1,26 @@ +/* See LICENSE for license details. */ + +// NOTE(rnp): functions which require platform layer support but +// otherwise share implementation + +function b32 +take_lock(i32 *lock, i32 timeout_ms) +{ + b32 result = 0; + for (;;) { + i32 current = 0; + if (atomic_cas_u32(lock, &current, 1)) + result = 1; + if (result || !timeout_ms || (!os_wait_on_address(lock, current, (u32)timeout_ms) && timeout_ms != -1)) + break; + } + return result; +} + +function void +release_lock(i32 *lock) +{ + assert(atomic_load_u32(lock)); + atomic_store_u32(lock, 0); + os_wake_all_waiters(lock); +}