ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 8e2d399de3ad4d383c731cb6575a2c993a9cdb9e
Parent: 19f298ce4ec4b79fcacb5693c4e3eee989aff9cd
Author: Randy Palamar
Date:   Sun,  6 Apr 2025 08:35:15 -0600

core/lib: upload rf data through the shared memory area

Jank pipe begone!

Diffstat:
Mbeamformer.c | 192+++++++++++++++++++++++++++++--------------------------------------------------
Mbeamformer.h | 96++++++++++++++++++-------------------------------------------------------------
Abeamformer_work_queue.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Abeamformer_work_queue.h | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbuild.sh | 7++++---
Mhelpers/ogl_beamformer_lib.c | 203+++++++++++++++++++++++++++++--------------------------------------------------
Mhelpers/ogl_beamformer_lib.h | 19+++++--------------
Mmain_linux.c | 20++++----------------
Mmain_w32.c | 37-------------------------------------
Mos_linux.c | 28++++++++--------------------
Mos_win32.c | 28++++------------------------
Mstatic.c | 15+++++++--------
Mui.c | 11++++++-----
Mutil.c | 1-
Mutil.h | 6+++---
15 files changed, 341 insertions(+), 457 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -1,5 +1,6 @@ /* See LICENSE for license details. */ #include "beamformer.h" +#include "beamformer_work_queue.c" static f32 dt_for_frame; static u32 cycle_t; @@ -105,10 +106,10 @@ static void alloc_shader_storage(BeamformerCtx *ctx, Arena a) { ComputeShaderCtx *cs = &ctx->csctx; - BeamformerParameters *bp = &ctx->params->raw; + BeamformerParameters *bp = &ctx->shared_memory->raw; uv4 dec_data_dim = bp->dec_data_dim; - u32 rf_raw_size = ctx->params->raw_data_size; + u32 rf_raw_size = ctx->shared_memory->raw_data_size; cs->dec_data_dim = dec_data_dim; cs->rf_raw_size = rf_raw_size; @@ -176,56 +177,6 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a) LABEL_GL_OBJECT(GL_TEXTURE, cs->hadamard_texture, s8("Hadamard_Matrix")); } -static BeamformWork * -beamform_work_queue_pop(BeamformWorkQueue *q) -{ - BeamformWork *result = 0; - - static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); - u64 val = atomic_load(&q->queue); - u64 mask = ARRAY_COUNT(q->work_items) - 1; - u32 widx = val & mask; - u32 ridx = val >> 32 & mask; - - if (ridx != widx) - result = q->work_items + ridx; - - return result; -} - -static void -beamform_work_queue_pop_commit(BeamformWorkQueue *q) -{ - atomic_add(&q->queue, 0x100000000ULL); -} - -DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push) -{ - BeamformWork *result = 0; - - static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); - u64 val = atomic_load(&q->queue); - u64 mask = ARRAY_COUNT(q->work_items) - 1; - u32 widx = val & mask; - u32 ridx = val >> 32 & mask; - u32 next = (widx + 1) & mask; - - if (val & 0x80000000) - atomic_and(&q->queue, ~0x80000000); - - if (next != ridx) { - result = q->work_items + widx; - zero_struct(result); - } - - return result; -} - -DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit) -{ - atomic_add(&q->queue, 1); -} - static b32 fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work) { @@ -434,7 +385,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame, ASSERT(frame >= ctx->beamform_frames); ASSERT(frame < ctx->beamform_frames + ARRAY_COUNT(ctx->beamform_frames)); u32 base_index = (u32)(frame - ctx->beamform_frames); - u32 to_average = ctx->params->raw.output_points.w; + u32 to_average = ctx->shared_memory->raw.output_points.w; u32 frame_count = 0; u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES); ComputeFrameIterator cfi = compute_frame_iterator(ctx, 1 + base_index - to_average, @@ -594,15 +545,13 @@ reload_compute_shader(BeamformerCtx *ctx, s8 path, s8 extra, ComputeShaderReload return result; } -DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) +static void +complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_context, iz barrier_offset) { - BeamformerCtx *ctx = (BeamformerCtx *)user_context; - BeamformWorkQueue *q = ctx->beamform_work_queue; - BeamformWork *work = beamform_work_queue_pop(q); - ComputeShaderCtx *cs = &ctx->csctx; - - BeamformerParameters *bp = &ctx->params->raw; + ComputeShaderCtx *cs = &ctx->csctx; + BeamformerParameters *bp = &ctx->shared_memory->raw; + BeamformWork *work = beamform_work_queue_pop(q); while (work) { b32 can_commit = 1; switch (work->type) { @@ -630,55 +579,47 @@ DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) #undef X } } break; - case BW_LOAD_RF_DATA: { - if (cs->rf_raw_size != ctx->params->raw_data_size || + case BW_UPLOAD_RF_DATA: { + ASSERT(!atomic_load(&ctx->shared_memory->raw_data_sync)); + + if (cs->rf_raw_size != ctx->shared_memory->raw_data_size || !uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) { alloc_shader_storage(ctx, arena); } - void *rf_data_buf = cs->raw_data_arena.beg; - iz rlen = ctx->os.read_file(work->file_handle, rf_data_buf, cs->rf_raw_size); - if (rlen != cs->rf_raw_size) { - stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: ")); - stream_append_i64(&ctx->error_stream, rlen); - stream_append_byte(&ctx->error_stream, '/'); - stream_append_i64(&ctx->error_stream, cs->rf_raw_size); - stream_append_byte(&ctx->error_stream, '\n'); - ctx->os.write_file(ctx->os.stderr, stream_to_s8(&ctx->error_stream)); - ctx->error_stream.widx = 0; - } else { - switch (ctx->gl.vendor_id) { - case GL_VENDOR_AMD: - case GL_VENDOR_ARM: - case GL_VENDOR_INTEL: - break; - case GL_VENDOR_NVIDIA: - glNamedBufferSubData(cs->raw_data_ssbo, 0, rlen, rf_data_buf); - } + void *raw_data = (u8 *)ctx->shared_memory + BEAMFORMER_RF_DATA_OFF; + switch (ctx->gl.vendor_id) { + case GL_VENDOR_AMD: + case GL_VENDOR_ARM: + case GL_VENDOR_INTEL: + mem_copy(cs->raw_data_arena.beg, raw_data, cs->rf_raw_size); + break; + case GL_VENDOR_NVIDIA: + glNamedBufferSubData(cs->raw_data_ssbo, 0, cs->rf_raw_size, raw_data); + break; } - ctx->ready_for_rf = 1; } break; case BW_COMPUTE: { atomic_store(&cs->processing_compute, 1); start_renderdoc_capture(gl_context); BeamformComputeFrame *frame = work->frame; - if (ctx->params->upload) { - glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->params->raw), - &ctx->params->raw); - ctx->params->upload = 0; + if (ctx->shared_memory->upload) { + glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->shared_memory->raw), + &ctx->shared_memory->raw); + ctx->shared_memory->upload = 0; } if (cs->programs[CS_DAS]) glProgramUniform1ui(cs->programs[CS_DAS], cs->cycle_t_id, cycle_t++); - uv3 try_dim = make_valid_test_dim(ctx->params->raw.output_points.xyz); + uv3 try_dim = make_valid_test_dim(ctx->shared_memory->raw.output_points.xyz); if (!uv3_equal(try_dim, frame->frame.dim)) alloc_beamform_frame(&ctx->gl, &frame->frame, &frame->stats, try_dim, s8("Beamformed_Data"), arena); - if (ctx->params->raw.output_points.w > 1) { + if (ctx->shared_memory->raw.output_points.w > 1) { if (!uv3_equal(try_dim, ctx->averaged_frames[0].frame.dim)) { alloc_beamform_frame(&ctx->gl, &ctx->averaged_frames[0].frame, &ctx->averaged_frames[0].stats, @@ -690,14 +631,14 @@ DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) } frame->in_flight = 1; - frame->frame.min_coordinate = ctx->params->raw.output_min_coordinate; - frame->frame.max_coordinate = ctx->params->raw.output_max_coordinate; - frame->frame.das_shader_id = ctx->params->raw.das_shader_id; - frame->frame.compound_count = ctx->params->raw.dec_data_dim.z; + frame->frame.min_coordinate = ctx->shared_memory->raw.output_min_coordinate; + frame->frame.max_coordinate = ctx->shared_memory->raw.output_max_coordinate; + frame->frame.das_shader_id = ctx->shared_memory->raw.das_shader_id; + frame->frame.compound_count = ctx->shared_memory->raw.dec_data_dim.z; b32 did_sum_shader = 0; - u32 stage_count = ctx->params->compute_stages_count; - ComputeShaderID *stages = ctx->params->compute_stages; + u32 stage_count = ctx->shared_memory->compute_stages_count; + ComputeShaderID *stages = ctx->shared_memory->compute_stages; for (u32 i = 0; i < stage_count; i++) { did_sum_shader |= stages[i] == CS_SUM; frame->stats.timer_active[stages[i]] = 1; @@ -742,15 +683,27 @@ DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) INVALID_CODE_PATH; } } break; + default: INVALID_CODE_PATH; break; } if (can_commit) { + if (work->completion_barrier) { + i32 *value = (i32 *)(barrier_offset + work->completion_barrier); + ctx->os.wake_waiters(value); + } beamform_work_queue_pop_commit(q); work = beamform_work_queue_pop(q); } } } +DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) +{ + BeamformerCtx *ctx = (BeamformerCtx *)user_context; + complete_queue(ctx, &ctx->shared_memory->external_work_queue, arena, gl_context, (iz)ctx->shared_memory); + complete_queue(ctx, ctx->beamform_work_queue, arena, gl_context, 0); +} + #include "ui.c" DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) @@ -768,56 +721,51 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) DEBUG_DECL(end_frame_capture = ctx->os.end_frame_capture); } - if (ctx->start_compute && !input->pipe_data_available) { - if (ctx->beamform_frames[ctx->display_frame_index].ready_to_present) { - BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); - if (fill_frame_compute_work(ctx, work)) { - beamform_work_queue_push_commit(ctx->beamform_work_queue); - ctx->os.wake_waiters(&ctx->os.compute_worker.sync_variable); - ctx->start_compute = 0; - } - } - } - - BeamformerParameters *bp = &ctx->params->raw; - if (ctx->ready_for_rf && input->pipe_data_available) { + BeamformerParameters *bp = &ctx->shared_memory->raw; + if (ctx->shared_memory->start_compute) { + ctx->shared_memory->start_compute = 0; BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); if (work) { - ctx->start_compute = 1; - ctx->ready_for_rf = 0; - - work->type = BW_LOAD_RF_DATA; - work->file_handle = input->pipe_handle; - beamform_work_queue_push_commit(ctx->beamform_work_queue); - - BeamformWork *compute = beamform_work_queue_push(ctx->beamform_work_queue); - if (fill_frame_compute_work(ctx, compute)) + if (fill_frame_compute_work(ctx, work)) beamform_work_queue_push_commit(ctx->beamform_work_queue); - if (compute && ctx->params->export_next_frame) { + if (ctx->shared_memory->export_next_frame) { BeamformWork *export = beamform_work_queue_push(ctx->beamform_work_queue); if (export) { /* TODO: we don't really want the beamformer opening/closing files */ - iptr f = ctx->os.open_for_write(ctx->params->export_pipe_name); + iptr f = ctx->os.open_for_write(ctx->shared_memory->export_pipe_name); export->type = BW_SAVE_FRAME; export->output_frame_ctx.file_handle = f; - if (ctx->params->raw.output_points.w > 1) { + if (ctx->shared_memory->raw.output_points.w > 1) { u32 a_index = !(ctx->averaged_frame_index % ARRAY_COUNT(ctx->averaged_frames)); BeamformComputeFrame *aframe = ctx->averaged_frames + a_index; export->output_frame_ctx.frame = aframe; } else { - export->output_frame_ctx.frame = compute->frame; + export->output_frame_ctx.frame = work->frame; } beamform_work_queue_push_commit(ctx->beamform_work_queue); } - ctx->params->export_next_frame = 0; + ctx->shared_memory->export_next_frame = 0; } - if (ctx->params->upload) { + if (ctx->shared_memory->upload) { /* TODO(rnp): clean this up */ ctx->ui_read_params = 1; } + + ctx->os.wake_waiters(&ctx->os.compute_worker.sync_variable); + } + } + + if (ctx->start_compute) { + if (ctx->beamform_frames[ctx->display_frame_index].ready_to_present) { + BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); + if (fill_frame_compute_work(ctx, work)) { + beamform_work_queue_push_commit(ctx->beamform_work_queue); + ctx->os.wake_waiters(&ctx->os.compute_worker.sync_variable); + ctx->start_compute = 0; + } } } diff --git a/beamformer.h b/beamformer.h @@ -18,12 +18,9 @@ enum gl_vendor_ids { }; typedef struct { + v2 mouse; + v2 last_mouse; b32 executable_reloaded; - b32 pipe_data_available; - iptr pipe_handle; - - v2 mouse; - v2 last_mouse; } BeamformerInput; #define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u16 *channel_mapping) @@ -56,17 +53,14 @@ typedef struct { #undef X } CudaLib; -#include "beamformer_parameters.h" - typedef struct { - BeamformerParameters raw; - ComputeShaderID compute_stages[16]; - u32 compute_stages_count; - b32 upload; - u32 raw_data_size; - b32 export_next_frame; - c8 export_pipe_name[1024]; -} BeamformerParametersFull; + Shader shader; + b32 updated; + i32 db_cutoff_id; + i32 threshold_id; +} FragmentShaderCtx; + +#include "beamformer_parameters.h" #define CS_UNIFORMS \ X(CS_DAS, voxel_offset) \ @@ -103,13 +97,6 @@ typedef struct { #undef X } ComputeShaderCtx; -typedef struct { - Shader shader; - b32 updated; - i32 db_cutoff_id; - i32 threshold_id; -} FragmentShaderCtx; - typedef enum { #define X(type, id, pretty, fixed_tx) DAS_ ##type = id, DAS_TYPES @@ -143,12 +130,14 @@ typedef struct BeamformFrame { struct BeamformFrame *next; } BeamformFrame; -typedef struct { +struct BeamformComputeFrame { BeamformFrame frame; ComputeShaderStats stats; b32 in_flight; b32 ready_to_present; -} BeamformComputeFrame; +}; + +#include "beamformer_work_queue.h" typedef struct { enum gl_vendor_ids vendor_id; @@ -161,56 +150,13 @@ typedef struct { i32 max_server_wait_time; } GLParams; -enum beamform_work { - BW_COMPUTE, - BW_LOAD_RF_DATA, - BW_RELOAD_SHADER, - BW_SAVE_FRAME, - BW_SEND_FRAME, -}; - -typedef struct { - void *beamformer_ctx; - s8 label; - s8 path; - ComputeShaderID shader; - b32 needs_header; -} ComputeShaderReloadContext; - -typedef struct { - BeamformComputeFrame *frame; - iptr file_handle; -} BeamformOutputFrameContext; - -/* NOTE: discriminated union based on type */ -typedef struct { - union { - iptr file_handle; - BeamformComputeFrame *frame; - BeamformOutputFrameContext output_frame_ctx; - ComputeShaderReloadContext *reload_shader_ctx; - }; - u32 type; -} BeamformWork; - typedef struct { - union { - u64 queue; - struct {u32 widx, ridx;}; - }; - BeamformWork work_items[1 << 6]; -} BeamformWorkQueue; - -typedef struct BeamformerCtx { GLParams gl; uv2 window_size; b32 start_compute; b32 should_exit; - /* TODO(rnp): is there a better way of tracking this? */ - b32 ready_for_rf; - Arena ui_backing_store; void *ui; /* TODO(rnp): this is nasty and should be removed */ @@ -235,9 +181,17 @@ typedef struct BeamformerCtx { BeamformWorkQueue *beamform_work_queue; - BeamformerParametersFull *params; + BeamformerSharedMemory *shared_memory; } BeamformerCtx; +struct ComputeShaderReloadContext { + BeamformerCtx *beamformer_ctx; + s8 label; + s8 path; + ComputeShaderID shader; + b32 needs_header; +}; + #define LABEL_GL_OBJECT(type, id, s) {s8 _s = (s); glObjectLabel(type, id, _s.len, (c8 *)_s.data);} #define BEAMFORMER_FRAME_STEP_FN(name) void name(BeamformerCtx *ctx, Arena *arena, \ @@ -247,10 +201,4 @@ typedef BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step_fn); #define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(iptr user_context, Arena arena, iptr gl_context) typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn); -#define BEAMFORM_WORK_QUEUE_PUSH_FN(name) BeamformWork *name(BeamformWorkQueue *q) -typedef BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push_fn); - -#define BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(name) void name(BeamformWorkQueue *q) -typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn); - #endif /*_BEAMFORMER_H_ */ diff --git a/beamformer_work_queue.c b/beamformer_work_queue.c @@ -0,0 +1,52 @@ +/* See LICENSE for license details. */ +#include "beamformer_work_queue.h" + +static BeamformWork * +beamform_work_queue_pop(BeamformWorkQueue *q) +{ + BeamformWork *result = 0; + + static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); + u64 val = atomic_load(&q->queue); + u64 mask = ARRAY_COUNT(q->work_items) - 1; + u32 widx = val & mask; + u32 ridx = val >> 32 & mask; + + if (ridx != widx) + result = q->work_items + ridx; + + return result; +} + +static void +beamform_work_queue_pop_commit(BeamformWorkQueue *q) +{ + atomic_add(&q->queue, 0x100000000ULL); +} + +DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push) +{ + BeamformWork *result = 0; + + static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); + u64 val = atomic_load(&q->queue); + u64 mask = ARRAY_COUNT(q->work_items) - 1; + u32 widx = val & mask; + u32 ridx = val >> 32 & mask; + u32 next = (widx + 1) & mask; + + if (val & 0x80000000) + atomic_and(&q->queue, ~0x80000000); + + if (next != ridx) { + result = q->work_items + widx; + zero_struct(result); + } + + return result; +} + +DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit) +{ + atomic_add(&q->queue, 1); +} diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h @@ -0,0 +1,83 @@ +/* See LICENSE for license details. */ +#ifndef _BEAMFORMER_WORK_QUEUE_H_ +#define _BEAMFORMER_WORK_QUEUE_H_ + +typedef struct BeamformComputeFrame BeamformComputeFrame; +typedef struct ComputeShaderReloadContext ComputeShaderReloadContext; + +typedef enum { + BW_COMPUTE, + BW_RELOAD_SHADER, + BW_SAVE_FRAME, + BW_SEND_FRAME, + BW_UPLOAD_CHANNEL_MAPPING, + BW_UPLOAD_FOCAL_VECTORS, + BW_UPLOAD_PARAMS, + BW_UPLOAD_RF_DATA, + BW_UPLOAD_SPARSE_ELEMENTS, +} BeamformWorkType; + +typedef struct { + BeamformComputeFrame *frame; + iptr file_handle; +} BeamformOutputFrameContext; + +/* NOTE: discriminated union based on type */ +typedef struct { + union { + BeamformComputeFrame *frame; + BeamformOutputFrameContext output_frame_ctx; + ComputeShaderReloadContext *reload_shader_ctx; + void *generic; + }; + /* NOTE(rnp): mostly for __external__ processes to sync on. when passed from external + * process this should be an offset from base of shared_memory */ + iptr completion_barrier; + + BeamformWorkType type; +} BeamformWork; + +typedef struct { + union { + u64 queue; + struct {u32 widx, ridx;}; + }; + BeamformWork work_items[1 << 6]; +} BeamformWorkQueue; + +#define BEAMFORM_WORK_QUEUE_PUSH_FN(name) BeamformWork *name(BeamformWorkQueue *q) +typedef BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push_fn); + +#define BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(name) void name(BeamformWorkQueue *q) +typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn); + +#define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2)) +#define BEAMFORMER_RF_DATA_OFF (sizeof(BeamformerSharedMemory) + 4096ULL \ + - (uintptr_t)(sizeof(BeamformerSharedMemory) & 4095ULL)) +#define BEAMFORMER_MAX_RF_DATA_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - BEAMFORMER_RF_DATA_OFF) + +typedef struct { + BeamformerParameters raw; + + ComputeShaderID compute_stages[16]; + u32 compute_stages_count; + + i32 raw_data_sync; + u32 raw_data_size; + + /* TODO(rnp): these shouldn't be needed */ + b32 upload; + b32 start_compute; + b32 export_next_frame; + + /* TODO(rnp): probably remove this */ + c8 export_pipe_name[256]; + + u16 channel_mapping[256]; + u16 sparse_elements[256]; + v2 transmit_angles_focal_depths[256]; + + BeamformWorkQueue external_work_queue; +} BeamformerSharedMemory; + +#endif /* _BEAMFORMER_WORK_QUEUE_H_ */ diff --git a/build.sh b/build.sh @@ -31,11 +31,12 @@ MINGW64*) main="main_w32.c" libname="beamformer.dll" ldflags="${ldflags} -lgdi32 -lwinmm -lntdll" + extra_ldflags="-lntdll" if [ ! ${NO_MATLAB} ]; then libcflags="${libcflags} -DMATLAB_CONSOLE" - extra_ldflags="-llibmat -llibmex" + extra_ldflags="${extra_ldflags} -llibmat -llibmex" fi - ${cc} ${libcflags} helpers/ogl_beamformer_lib.c -o helpers/ogl_beamformer_lib.dll \ + ${cc} ${libcflags} -O3 helpers/ogl_beamformer_lib.c -o helpers/ogl_beamformer_lib.dll \ -L'C:/Program Files/MATLAB/R2022a/extern/lib/win64/microsoft' \ ${extra_ldflags} ;; @@ -47,7 +48,7 @@ Linux*) libname="beamformer.so" cflags="${cflags} -D_DEFAULT_SOURCE" - ${cc} ${libcflags} helpers/ogl_beamformer_lib.c -o helpers/ogl_beamformer_lib.so + ${cc} ${libcflags} -O3 helpers/ogl_beamformer_lib.c -o helpers/ogl_beamformer_lib.so ;; esac diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -1,43 +1,24 @@ /* See LICENSE for license details. */ -#include "ogl_beamformer_lib.h" - -typedef struct { - BeamformerParameters raw; - ComputeShaderID compute_stages[16]; - u32 compute_stages_count; - b32 upload; - u32 raw_data_size; - b32 export_next_frame; - c8 export_pipe_name[1024]; -} BeamformerParametersFull; - -typedef struct { - iptr file; - char *name; -} Pipe; - -typedef struct { size len; u8 *data; } s8; -#define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s} - -#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a)) - -#define U32_MAX (0xFFFFFFFFUL) - -#define INVALID_FILE (-1) +#include "../util.h" +#include "../beamformer_parameters.h" +#include "../beamformer_work_queue.c" #define PIPE_RETRY_PERIOD_MS (100ULL) -static volatile BeamformerParametersFull *g_bp; -static Pipe g_pipe = {.file = INVALID_FILE}; +static BeamformerSharedMemory *g_bp; -#if defined(__unix__) +#if defined(__linux__) #include <fcntl.h> +#include <linux/futex.h> #include <poll.h> #include <sys/mman.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <time.h> #include <unistd.h> +i64 syscall(i64, ...); + #define OS_EXPORT_PIPE_NAME "/tmp/beamformer_output_pipe" #elif defined(_WIN32) @@ -67,6 +48,7 @@ W32(i32) GetLastError(void); W32(iptr) MapViewOfFile(iptr, u32, u32, u32, u64); W32(iptr) OpenFileMappingA(u32, b32, c8 *); W32(b32) ReadFile(iptr, u8 *, i32, i32 *, void *); +W32(i32) RtlWaitOnAddress(void *, void *, uz, void *); W32(void) Sleep(u32); W32(void) UnmapViewOfFile(iptr); W32(b32) WriteFile(iptr, u8 *, i32, i32 *, void *); @@ -88,11 +70,17 @@ void mexWarnMsgIdAndTxt(const c8*, c8*, ...); #define warning_msg(...) #endif -#if defined(__unix__) -static Pipe -os_open_named_pipe(char *name) +#if defined(__linux__) +static void +os_wait_on_value(i32 *value, i32 current, u32 timeout_ms) { - return (Pipe){.file = open(name, O_WRONLY), .name = name}; + struct timespec *timeout = 0, timeout_value; + if (timeout_ms != U32_MAX) { + timeout_value.tv_sec = timeout_ms / 1000; + timeout_value.tv_nsec = (timeout_ms % 1000) * 1000000; + timeout = &timeout_value; + } + syscall(SYS_futex, value, FUTEX_WAIT, current, timeout, 0, 0); } static Pipe @@ -116,12 +104,12 @@ os_close_pipe(iptr *file, char *name) } static b32 -os_wait_read_pipe(Pipe p, void *buf, size read_size, u32 timeout_ms) +os_wait_read_pipe(Pipe p, void *buf, iz read_size, u32 timeout_ms) { struct pollfd pfd = {.fd = p.file, .events = POLLIN}; - size total_read = 0; + iz total_read = 0; if (poll(&pfd, 1, timeout_ms) > 0) { - size r; + iz r; do { r = read(p.file, (u8 *)buf + total_read, read_size - total_read); if (r > 0) total_read += r; @@ -130,47 +118,32 @@ os_wait_read_pipe(Pipe p, void *buf, size read_size, u32 timeout_ms) return total_read == read_size; } -static size -os_write(iptr f, void *data, size data_size) -{ - size written = 0, w = 0; - do { - w = write(f, (u8 *)data + written, data_size - written); - if (w != -1) written += w; - } while (written != data_size && w != 0); - return written; -} - -static BeamformerParametersFull * +static BeamformerSharedMemory * os_open_shared_memory_area(char *name) { + BeamformerSharedMemory *result = 0; i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR); - if (fd == -1) - return NULL; - - BeamformerParametersFull *new; - new = mmap(NULL, sizeof(*new), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - - if (new == MAP_FAILED) - return NULL; - - return new; -} - -static void -os_release_shared_memory(iptr memory, u64 size) -{ - munmap((void *)memory, size); + if (fd > 0) { + void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (new != MAP_FAILED) + result = new; + close(fd); + } + return result; } #elif defined(_WIN32) -static Pipe -os_open_named_pipe(char *name) +static void +os_wait_on_value(i32 *value, i32 current, u32 timeout_ms) { - iptr pipe = CreateFileA(name, GENERIC_WRITE, 0, 0, OPEN_EXISTING, 0, 0); - return (Pipe){.file = pipe, .name = name}; + i64 *timeout = 0, timeout_value; + if (timeout_ms != U32_MAX) { + /* TODO(rnp): not sure about this one, but this is how wine converts the ms */ + timeout_value = -(i64)timeout_ms * 10000; + timeout = &timeout_value; + } + RtlWaitOnAddress(value, &current, sizeof(*value), timeout); } static Pipe @@ -195,9 +168,9 @@ os_close_pipe(iptr *file, char *name) } static b32 -os_wait_read_pipe(Pipe p, void *buf, size read_size, u32 timeout_ms) +os_wait_read_pipe(Pipe p, void *buf, iz read_size, u32 timeout_ms) { - size elapsed_ms = 0, total_read = 0; + iz elapsed_ms = 0, total_read = 0; while (elapsed_ms <= timeout_ms && read_size != total_read) { u8 data; i32 read; @@ -221,37 +194,18 @@ os_wait_read_pipe(Pipe p, void *buf, size read_size, u32 timeout_ms) return total_read == read_size; } -static size -os_write(iptr f, void *data, size data_size) -{ - i32 written = 0; - b32 result = WriteFile(f, (u8 *)data, data_size, &written, 0); - if (!result) { - i32 error = GetLastError(); - warning_msg("os_write(data_size = %td): error: %d", data_size, error); - } - return written; -} - -static BeamformerParametersFull * +static BeamformerSharedMemory * os_open_shared_memory_area(char *name) { + BeamformerSharedMemory *result = 0; iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name); - if (h == INVALID_FILE) - return 0; - - BeamformerParametersFull *new; - iptr view = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, sizeof(*new)); - new = (BeamformerParametersFull *)view; - CloseHandle(h); - - return new; -} + if (h != INVALID_FILE) { + iptr view = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE); + result = (BeamformerSharedMemory *)view; + CloseHandle(h); + } -static void -os_release_shared_memory(iptr memory, u64 size) -{ - UnmapViewOfFile(memory); + return result; } #endif @@ -304,9 +258,7 @@ set_beamformer_parameters(char *shm_name, BeamformerParameters *new_bp) if (!check_shared_memory(shm_name)) return 0; - u8 *src = (u8 *)new_bp, *dest = (u8 *)&g_bp->raw; - for (size i = 0; i < sizeof(BeamformerParameters); i++) - dest[i] = src[i]; + mem_copy(&g_bp->raw, new_bp, sizeof(BeamformerParameters)); g_bp->upload = 1; return 1; @@ -315,37 +267,29 @@ set_beamformer_parameters(char *shm_name, BeamformerParameters *new_bp) b32 send_data(char *pipe_name, char *shm_name, void *data, u32 data_size) { - b32 result = g_pipe.file != INVALID_FILE; - if (!result) { - g_pipe = os_open_named_pipe(pipe_name); - result = g_pipe.file != INVALID_FILE; - if (!result) - error_msg("failed to open pipe"); - } - result &= check_shared_memory(shm_name); - + b32 result = check_shared_memory(shm_name) && data_size <= BEAMFORMER_MAX_RF_DATA_SIZE; if (result) { - g_bp->raw_data_size = data_size; - g_bp->upload = 1; - - size written = os_write(g_pipe.file, data, data_size); - result = written == data_size; - if (!result) { - warning_msg("failed to write data to pipe: retrying..."); - os_close_pipe(&g_pipe.file, 0); - os_release_shared_memory((iptr)g_bp, sizeof(*g_bp)); - g_bp = 0; - g_pipe = os_open_named_pipe(pipe_name); - result = g_pipe.file != INVALID_FILE && check_shared_memory(shm_name); - if (result) - written = os_write(g_pipe.file, data, data_size); - result = written == data_size; - if (!result) - warning_msg("failed again, wrote %ld/%u\ngiving up", - written, data_size); + BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); + if (work) { + i32 current = atomic_load(&g_bp->raw_data_sync); + if (current) { + atomic_inc(&g_bp->raw_data_sync, -current); + work->type = BW_UPLOAD_RF_DATA; + work->completion_barrier = offsetof(BeamformerSharedMemory, raw_data_sync); + mem_copy((u8 *)g_bp + BEAMFORMER_RF_DATA_OFF, data, data_size); + g_bp->raw_data_size = data_size; + + beamform_work_queue_push_commit(&g_bp->external_work_queue); + + g_bp->start_compute = 1; + /* TODO(rnp): set timeout on acquiring the lock instead of this */ + os_wait_on_value(&g_bp->raw_data_sync, 0, -1); + } else { + warning_msg("failed to acquire raw data lock"); + result = 0; + } } } - return result; } @@ -383,7 +327,7 @@ beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data b32 result = send_data(pipe_name, shm_name, data, data_size); if (result) { - size output_size = output_points.x * output_points.y * output_points.z * sizeof(f32) * 2; + iz output_size = output_points.x * output_points.y * output_points.z * sizeof(f32) * 2; result = os_wait_read_pipe(export_pipe, out_data, output_size, timeout_ms); if (!result) warning_msg("failed to read full export data from pipe"); @@ -391,7 +335,6 @@ beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data os_disconnect_pipe(export_pipe); os_close_pipe(&export_pipe.file, export_pipe.name); - os_close_pipe(&g_pipe.file, 0); return result; } diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -2,18 +2,17 @@ #include <stddef.h> #include <stdint.h> -typedef char c8; -typedef uint8_t u8; typedef int16_t i16; typedef uint16_t u16; typedef int32_t i32; typedef uint32_t u32; typedef uint32_t b32; -typedef uint64_t u64; typedef float f32; -typedef double f64; -typedef ptrdiff_t size; -typedef ptrdiff_t iptr; +typedef struct { u32 x, y; } uv2; +typedef struct { u32 x, y, z, w; } uv4; +typedef struct { f32 x, y, z, w; } v4; + +#include "../beamformer_parameters.h" #if defined(_WIN32) #define LIB_FN __declspec(dllexport) @@ -21,14 +20,6 @@ typedef ptrdiff_t iptr; #define LIB_FN #endif -typedef struct { f32 x, y; } v2; -typedef struct { f32 x, y, z, w; } v4; -typedef struct { u32 x, y; } uv2; -typedef struct { u32 x, y, z; } uv3; -typedef struct { u32 x, y, z, w; } uv4; - -#include "../beamformer_parameters.h" - LIB_FN b32 set_beamformer_parameters(char *shm_name, BeamformerParameters *); LIB_FN b32 set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count); diff --git a/main_linux.c b/main_linux.c @@ -15,8 +15,7 @@ #define OS_RENDERDOC_SONAME "librenderdoc.so" -#define OS_PIPE_NAME "/tmp/beamformer_data_fifo" -#define OS_SMEM_NAME "/ogl_beamformer_parameters" +#define OS_SMEM_NAME "/ogl_beamformer_shared_memory" #define OS_PATH_SEPERATOR "/" @@ -71,10 +70,6 @@ main(void) ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4)); ctx.os.compute_worker.arena = sub_arena(&temp_memory, MB(2), KB(4)); - Pipe data_pipe = os_open_named_pipe(OS_PIPE_NAME); - input.pipe_handle = data_pipe.file; - ASSERT(data_pipe.file != INVALID_FILE); - #define X(name) ctx.os.name = os_ ## name; OS_FNS #undef X @@ -87,20 +82,17 @@ main(void) setup_beamformer(&ctx, &temp_memory); os_wake_waiters(&ctx.os.compute_worker.sync_variable); - struct pollfd fds[2] = {{0}, {0}}; + struct pollfd fds[1] = {{0}}; fds[0].fd = ctx.os.file_watch_context.handle; fds[0].events = POLLIN; - fds[1].fd = data_pipe.file; - fds[1].events = POLLIN; while (!ctx.should_exit) { poll(fds, 2, 0); if (fds[0].revents & POLLIN) dispatch_file_watch_events(&ctx.os, temp_memory); - input.pipe_data_available = !!(fds[1].revents & POLLIN); - input.last_mouse = input.mouse; - input.mouse.rl = GetMousePosition(); + input.last_mouse = input.mouse; + input.mouse.rl = GetMousePosition(); beamformer_frame_step(&ctx, &temp_memory, &input); @@ -110,8 +102,4 @@ main(void) /* NOTE: make sure this will get cleaned up after external * programs release their references */ shm_unlink(OS_SMEM_NAME); - - /* NOTE: garbage code needed for Linux */ - close(data_pipe.file); - unlink(data_pipe.name); } diff --git a/main_w32.c b/main_w32.c @@ -29,7 +29,6 @@ typedef struct { #define OS_RENDERDOC_SONAME "renderdoc.dll" -#define OS_PIPE_NAME "\\\\.\\pipe\\beamformer_data_fifo" #define OS_SMEM_NAME "Local\\ogl_beamformer_parameters" #define OS_PATH_SEPERATOR "\\" @@ -103,36 +102,6 @@ clear_io_queue(OS *os, BeamformerInput *input, Arena arena) } } -static b32 -poll_pipe(Pipe *p, Stream *e, OS *os) -{ - u8 data; - i32 total_read = 0; - b32 result = ReadFile(p->file, &data, 0, &total_read, 0); - if (!result) { - i32 error = GetLastError(); - /* NOTE: These errors mean nothing's been sent yet, otherwise pipe is busted - * and needs to be recreated. */ - if (error != ERROR_NO_DATA && - error != ERROR_PIPE_LISTENING && - error != ERROR_PIPE_NOT_CONNECTED) - { - DisconnectNamedPipe(p->file); - CloseHandle(p->file); - *p = os_open_named_pipe(p->name); - - if (p->file == INVALID_FILE) { - stream_append_s8(e, s8("poll_pipe: failed to reopen pipe: error: ")); - stream_append_i64(e, GetLastError()); - stream_append_byte(e, '\n'); - os->write_file(os->stderr, stream_to_s8(e)); - stream_reset(e, 0); - } - } - } - return result; -} - int main(void) { @@ -144,10 +113,6 @@ main(void) ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4)); ctx.os.compute_worker.arena = sub_arena(&temp_memory, MB(2), KB(4)); - Pipe data_pipe = os_open_named_pipe(OS_PIPE_NAME); - input.pipe_handle = data_pipe.file; - ASSERT(data_pipe.file != INVALID_FILE); - #define X(name) ctx.os.name = os_ ## name; OS_FNS #undef X @@ -169,8 +134,6 @@ main(void) input.last_mouse = input.mouse; input.mouse.rl = GetMousePosition(); - input.pipe_data_available = poll_pipe(&data_pipe, &ctx.error_stream, &ctx.os); - beamformer_frame_step(&ctx, &temp_memory, &input); input.executable_reloaded = 0; diff --git a/os_linux.c b/os_linux.c @@ -119,13 +119,6 @@ os_file_exists(char *path) return result; } -static Pipe -os_open_named_pipe(char *name) -{ - mkfifo(name, 0660); - return (Pipe){.file = open(name, O_RDONLY|O_NONBLOCK), .name = name}; -} - static OS_READ_FILE_FN(os_read_file) { iz r = 0, total_read = 0; @@ -140,22 +133,17 @@ static OS_READ_FILE_FN(os_read_file) static void * os_open_shared_memory_area(char *name, iz cap) { + void *result = 0; i32 fd = shm_open(name, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); - if (fd == -1) - return NULL; - - if (ftruncate(fd, cap) == -1) { + if (fd > 0) { + if (ftruncate(fd, cap) != -1) { + void *new = mmap(NULL, cap, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (new != MAP_FAILED) + result = new; + } close(fd); - return NULL; } - - void *new = mmap(NULL, cap, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - - if (new == MAP_FAILED) - return NULL; - - return new; + return result; } /* NOTE: complete garbage because there is no standarized copyfile() in POSix */ diff --git a/os_win32.c b/os_win32.c @@ -12,12 +12,6 @@ #define GENERIC_WRITE 0x40000000 #define GENERIC_READ 0x80000000 -#define PIPE_WAIT 0x00 -#define PIPE_NOWAIT 0x01 - -#define PIPE_TYPE_BYTE 0x00 -#define PIPE_ACCESS_INBOUND 0x01 - #define FILE_SHARE_READ 0x00000001 #define FILE_MAP_ALL_ACCESS 0x000F001F #define FILE_FLAG_BACKUP_SEMANTICS 0x02000000 @@ -30,10 +24,6 @@ #define CREATE_ALWAYS 2 #define OPEN_EXISTING 3 -#define ERROR_NO_DATA 232L -#define ERROR_PIPE_NOT_CONNECTED 233L -#define ERROR_PIPE_LISTENING 536L - #define THREAD_SET_LIMITED_INFORMATION 0x0400 typedef struct { @@ -89,10 +79,8 @@ W32(b32) CopyFileA(c8 *, c8 *, b32); W32(iptr) CreateFileA(c8 *, u32, u32, void *, u32, u32, void *); W32(iptr) CreateFileMappingA(iptr, void *, u32, u32, u32, c8 *); W32(iptr) CreateIoCompletionPort(iptr, iptr, uptr, u32); -W32(iptr) CreateNamedPipeA(c8 *, u32, u32, u32, u32, u32, u32, void *); W32(iptr) CreateThread(iptr, uz, iptr, iptr, u32, u32 *); W32(b32) DeleteFileA(c8 *); -W32(b32) DisconnectNamedPipe(iptr); W32(void) ExitProcess(i32); W32(b32) FreeLibrary(void *); W32(i32) GetFileAttributesA(c8 *); @@ -233,22 +221,14 @@ os_file_exists(char *path) return result; } -static Pipe -os_open_named_pipe(char *name) -{ - iptr h = CreateNamedPipeA(name, PIPE_ACCESS_INBOUND, PIPE_TYPE_BYTE|PIPE_NOWAIT, 1, - 0, MB(1), 0, 0); - return (Pipe){.file = h, .name = name}; -} - static void * os_open_shared_memory_area(char *name, iz cap) { + void *result = 0; iptr h = CreateFileMappingA(-1, 0, PAGE_READWRITE, 0, cap, name); - if (h == INVALID_FILE) - return NULL; - - return MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, cap); + if (h != INVALID_FILE) + result = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, cap); + return result; } static void * diff --git a/static.c b/static.c @@ -302,14 +302,15 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) ctx->beamform_work_queue = push_struct(memory, BeamformWorkQueue); - ctx->params = os_open_shared_memory_area(OS_SMEM_NAME, sizeof(*ctx->params)); - /* TODO: properly handle this? */ - ASSERT(ctx->params); + ctx->shared_memory = os_open_shared_memory_area(OS_SMEM_NAME, BEAMFORMER_SHARED_MEMORY_SIZE); + if (!ctx->shared_memory) + os_fatal(s8("Get more ram lol\n")); + ctx->shared_memory->raw_data_sync = 1; /* NOTE: default compute shader pipeline */ - ctx->params->compute_stages[0] = CS_DECODE; - ctx->params->compute_stages[1] = CS_DAS; - ctx->params->compute_stages_count = 2; + ctx->shared_memory->compute_stages[0] = CS_DECODE; + ctx->shared_memory->compute_stages[1] = CS_DAS; + ctx->shared_memory->compute_stages_count = 2; if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA && load_cuda_lib(&ctx->os, s8(OS_CUDA_LIB_NAME), (iptr)&ctx->cuda_lib, *memory)) @@ -353,6 +354,4 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) s8 render = s8(static_path_join("shaders", "render.glsl")); reload_render_shader(&ctx->os, render, (iptr)&ctx->fsctx, *memory); os_add_file_watch(&ctx->os, memory, render, reload_render_shader, (iptr)&ctx->fsctx); - - ctx->ready_for_rf = 1; } diff --git a/ui.c b/ui.c @@ -1498,10 +1498,10 @@ draw_compute_stats_view(BeamformerCtx *ctx, Arena arena, ComputeShaderStats *sta v2 at = r.pos; Stream buf = stream_alloc(&arena, 64); f32 compute_time_sum = 0; - u32 stages = ctx->params->compute_stages_count; + u32 stages = ctx->shared_memory->compute_stages_count; TextSpec text_spec = {.font = &ui->font, .colour = NORMALIZED_FG_COLOUR, .flags = TF_LIMITED}; for (u32 i = 0; i < stages; i++) { - u32 index = ctx->params->compute_stages[i]; + u32 index = ctx->shared_memory->compute_stages[i]; text_spec.limits.size.x = r.size.w; draw_text(labels[index], at, &text_spec); text_spec.limits.size.x -= LISTING_ITEM_PAD + max_label_width; @@ -2380,7 +2380,7 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw /* TODO(rnp): there should be a better way of detecting this */ if (ctx->ui_read_params) { - mem_copy(&ui->params, &ctx->params->raw.output_min_coordinate, sizeof(ui->params)); + mem_copy(&ui->params, &ctx->shared_memory->raw.output_min_coordinate, sizeof(ui->params)); ui->flush_params = 0; ctx->ui_read_params = 0; } @@ -2392,9 +2392,10 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw if (ui->flush_params) { validate_ui_parameters(&ui->params); if (!ctx->csctx.processing_compute) { - mem_copy(&ctx->params->raw.output_min_coordinate, &ui->params, sizeof(ui->params)); + mem_copy(&ctx->shared_memory->raw.output_min_coordinate, &ui->params, + sizeof(ui->params)); ui->flush_params = 0; - ctx->params->upload = 1; + ctx->shared_memory->upload = 1; ctx->start_compute = 1; } } diff --git a/util.c b/util.c @@ -26,7 +26,6 @@ mem_clear(void *p_, u8 c, iz size) static void mem_copy(void *restrict dest, void *restrict src, uz n) { - ASSERT(n >= 0); u8 *s = src, *d = dest; for (; n; n--) *d++ = *s++; } diff --git a/util.h b/util.h @@ -71,9 +71,9 @@ if ((v)->prev) (v)->prev->next = (v)->next; \ } while (0) -#define KB(a) ((a) << 10ULL) -#define MB(a) ((a) << 20ULL) -#define GB(a) ((a) << 30ULL) +#define KB(a) ((u64)(a) << 10ULL) +#define MB(a) ((u64)(a) << 20ULL) +#define GB(a) ((u64)(a) << 30ULL) #define I32_MAX (0x7FFFFFFFL) #define U32_MAX (0xFFFFFFFFUL)