ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 9f1baf0d742a1a6826f928322f73d0b6e83d0f40
Parent: f10808abee641e1117c6d3ec977e275edf7a24bc
Author: Randy Palamar
Date:   Sun,  4 May 2025 14:20:05 -0600

core/lib: API breakage: use base c types for parameters, share os layer

for certain terrible programming languages with broken FFIs it is
better if we just use arrays of variable instead of nested
structures

while we are at it clean up duplicate OS code and make shared
memory name an implicit detail that doesn't need to be passed
around everywhere.

Diffstat:
Mbeamformer.c | 48++++++++++++++++++++++++------------------------
Mbeamformer_parameters.h | 68++++++++++++++++++++++++++++++++++----------------------------------
Mbeamformer_work_queue.h | 3---
Mhelpers/ogl_beamformer_lib.c | 197+++++++++++++++++++++++++++----------------------------------------------------
Mhelpers/ogl_beamformer_lib.h | 42+++++++++++++++---------------------------
Mmain_linux.c | 5++---
Mmain_w32.c | 3+--
Mos_linux.c | 19++++++++++---------
Mos_win32.c | 8++++++--
Mstatic.c | 2+-
Mui.c | 34+++++++++++++++++-----------------
Mutil.c | 26++++++++++++++++++++++++--
Mutil.h | 2++
13 files changed, 204 insertions(+), 253 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -35,17 +35,17 @@ typedef struct { u32 needed_frames; } ComputeFrameIterator; -static uv3 -make_valid_test_dim(uv3 in) +function uv3 +make_valid_test_dim(u32 in[3]) { uv3 result; - result.x = MAX(in.x, 1); - result.y = MAX(in.y, 1); - result.z = MAX(in.z, 1); + result.E[0] = MAX(in[0], 1); + result.E[1] = MAX(in[1], 1); + result.E[2] = MAX(in[2], 1); return result; } -static ComputeFrameIterator +function ComputeFrameIterator compute_frame_iterator(BeamformerCtx *ctx, u32 start_index, u32 needed_frames) { start_index = start_index % ARRAY_COUNT(ctx->beamform_frames); @@ -112,8 +112,7 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 rf_raw_size, Arena a) ComputeShaderCtx *cs = &ctx->csctx; BeamformerParameters *bp = &ctx->shared_memory->parameters; - uv4 dec_data_dim = bp->dec_data_dim; - cs->dec_data_dim = dec_data_dim; + cs->dec_data_dim = uv4_from_u32_array(bp->dec_data_dim); cs->rf_raw_size = rf_raw_size; glDeleteBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos); @@ -139,21 +138,22 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 rf_raw_size, Arena a) /* NOTE(rnp): these are stubs when CUDA isn't supported */ ctx->cuda_lib.register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos), cs->raw_data_ssbo); - ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E, + ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim, bp->dec_data_dim, ctx->shared_memory->channel_mapping); - i32 *hadamard = make_hadamard_transpose(&a, dec_data_dim.z); + u32 order = cs->dec_data_dim.z; + i32 *hadamard = make_hadamard_transpose(&a, order); if (hadamard) { glDeleteTextures(1, &cs->hadamard_texture); glCreateTextures(GL_TEXTURE_2D, 1, &cs->hadamard_texture); - glTextureStorage2D(cs->hadamard_texture, 1, GL_R8I, dec_data_dim.z, dec_data_dim.z); - glTextureSubImage2D(cs->hadamard_texture, 0, 0, 0, dec_data_dim.z, dec_data_dim.z, - GL_RED_INTEGER, GL_INT, hadamard); + glTextureStorage2D(cs->hadamard_texture, 1, GL_R8I, order, order); + glTextureSubImage2D(cs->hadamard_texture, 0, 0, 0, order, order, GL_RED_INTEGER, + GL_INT, hadamard); LABEL_GL_OBJECT(GL_TEXTURE, cs->hadamard_texture, s8("Hadamard_Matrix")); } } -static b32 +function b32 fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, ImagePlaneTag plane) { b32 result = 0; @@ -367,7 +367,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame, ASSERT(frame >= ctx->beamform_frames); ASSERT(frame < ctx->beamform_frames + ARRAY_COUNT(ctx->beamform_frames)); u32 base_index = (u32)(frame - ctx->beamform_frames); - u32 to_average = ctx->shared_memory->parameters.output_points.w; + u32 to_average = ctx->shared_memory->parameters.output_points[3]; u32 frame_count = 0; u32 *in_textures = push_array(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES); ComputeFrameIterator cfi = compute_frame_iterator(ctx, 1 + base_index - to_average, @@ -536,7 +536,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co } break; case BU_KIND_RF_DATA: { if (cs->rf_raw_size != uc->size || - !uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) + !uv4_equal(cs->dec_data_dim, uv4_from_u32_array(bp->dec_data_dim))) { alloc_shader_storage(ctx, uc->size, arena); } @@ -560,12 +560,12 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co start_renderdoc_capture(gl_context); BeamformComputeFrame *frame = work->frame; - uv3 try_dim = make_valid_test_dim(bp->output_points.xyz); + uv3 try_dim = make_valid_test_dim(bp->output_points); if (!uv3_equal(try_dim, frame->frame.dim)) alloc_beamform_frame(&ctx->gl, &frame->frame, &frame->stats, try_dim, s8("Beamformed_Data"), arena); - if (bp->output_points.w > 1) { + if (bp->output_points[3] > 1) { if (!uv3_equal(try_dim, ctx->averaged_frames[0].frame.dim)) { alloc_beamform_frame(&ctx->gl, &ctx->averaged_frames[0].frame, &ctx->averaged_frames[0].stats, @@ -577,10 +577,10 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co } frame->in_flight = 1; - frame->frame.min_coordinate = bp->output_min_coordinate; - frame->frame.max_coordinate = bp->output_max_coordinate; + frame->frame.min_coordinate = v4_from_f32_array(bp->output_min_coordinate); + frame->frame.max_coordinate = v4_from_f32_array(bp->output_max_coordinate); frame->frame.das_shader_id = bp->das_shader_id; - frame->frame.compound_count = bp->dec_data_dim.z; + frame->frame.compound_count = bp->dec_data_dim[2]; b32 did_sum_shader = 0; u32 stage_count = sm->compute_stages_count; @@ -703,10 +703,10 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) BeamformWork *export = beamform_work_queue_push(ctx->beamform_work_queue); if (export) { /* TODO: we don't really want the beamformer opening/closing files */ - iptr f = ctx->os.open_for_write(ctx->shared_memory->export_pipe_name); + iptr f = ctx->os.open_for_write(ctx->os.export_pipe_name); export->type = BW_SAVE_FRAME; export->output_frame_ctx.file_handle = f; - if (bp->output_points.w > 1) { + if (bp->output_points[3] > 1) { u32 a_index = !(ctx->averaged_frame_index % ARRAY_COUNT(ctx->averaged_frames)); BeamformComputeFrame *aframe = ctx->averaged_frames + a_index; @@ -750,7 +750,7 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) } BeamformComputeFrame *frame_to_draw; - if (bp->output_points.w > 1) { + if (bp->output_points[3] > 1) { u32 a_index = !(ctx->averaged_frame_index % ARRAY_COUNT(ctx->averaged_frames)); frame_to_draw = ctx->averaged_frames + a_index; } else { diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -70,44 +70,44 @@ typedef enum { /* TODO(rnp): actually use a substruct but generate a header compatible with MATLAB */ /* X(name, type, size, gltype, glsize, comment) */ #define BEAMFORMER_UI_PARAMS \ - X(output_min_coordinate, v4, , vec4, , "/* [m] Back-Top-Left corner of output region */") \ - X(output_max_coordinate, v4, , vec4, , "/* [m] Front-Bottom-Right corner of output region */") \ - X(output_points, uv4, , uvec4, , "/* Width * Height * Depth * (Frame Average Count) */") \ - X(sampling_frequency, f32, , float, , "/* [Hz] */") \ - X(center_frequency, f32, , float, , "/* [Hz] */") \ - X(speed_of_sound, f32, , float, , "/* [m/s] */") \ - X(off_axis_pos, f32, , float, , "/* [m] Position on screen normal to beamform in TPW/VLSHERCULES */") \ - X(beamform_plane, i32, , int, , "/* Plane to Beamform in TPW/VLS/HERCULES */") \ - X(f_number, f32, , float, , "/* F# (set to 0 to disable) */") \ - X(interpolate, b32, , bool, , "/* Perform Cubic Interpolation of RF Samples */") + X(output_min_coordinate, float, [4], vec4, , "/* [m] Back-Top-Left corner of output region */") \ + X(output_max_coordinate, float, [4], vec4, , "/* [m] Front-Bottom-Right corner of output region */") \ + X(output_points, uint32_t, [4], uvec4, , "/* Width * Height * Depth * (Frame Average Count) */") \ + X(sampling_frequency, float, , float, , "/* [Hz] */") \ + X(center_frequency, float, , float, , "/* [Hz] */") \ + X(speed_of_sound, float, , float, , "/* [m/s] */") \ + X(off_axis_pos, float, , float, , "/* [m] Position on screen normal to beamform in TPW/VLSHERCULES */") \ + X(beamform_plane, int32_t, , int, , "/* Plane to Beamform in TPW/VLS/HERCULES */") \ + X(f_number, float, , float, , "/* F# (set to 0 to disable) */") \ + X(interpolate, uint32_t, , bool, , "/* Perform Cubic Interpolation of RF Samples */") #define BEAMFORMER_PARAMS_HEAD_V0 \ - X(channel_mapping, u16, [256], uvec4, [32], "/* Transducer Channel to Verasonics Channel */") \ - X(uforces_channels, u16, [256], uvec4, [32], "/* Channels used for virtual UFORCES elements */") \ - X(focal_depths, f32, [256], vec4, [64], "/* [m] Focal Depths for each transmit of a RCA imaging scheme*/") \ - X(transmit_angles, f32, [256], vec4, [64], "/* [degrees] Transmit Angles for each transmit of a RCA imaging scheme*/") \ - X(xdc_transform, f32, [16] , mat4, , "/* IMPORTANT: column major order */") \ - X(dec_data_dim, uv4, , uvec4, , "/* Samples * Channels * Acquisitions; last element ignored */") \ - X(xdc_element_pitch, f32, [2] , vec2, , "/* [m] Transducer Element Pitch {row, col} */") \ - X(rf_raw_dim, uv2, , uvec2, , "/* Raw Data Dimensions */") \ - X(transmit_mode, i32, , int, , "/* Method/Orientation of Transmit */") \ - X(decode, u32, , uint, , "/* Decode or just reshape data */") \ - X(das_shader_id, u32, , uint, , "") \ - X(time_offset, f32, , float, , "/* pulse length correction time [s] */") + X(channel_mapping, uint16_t, [256], uvec4, [32], "/* Transducer Channel to Verasonics Channel */") \ + X(uforces_channels, uint16_t, [256], uvec4, [32], "/* Channels used for virtual UFORCES elements */") \ + X(focal_depths, float, [256], vec4, [64], "/* [m] Focal Depths for each transmit of a RCA imaging scheme*/") \ + X(transmit_angles, float, [256], vec4, [64], "/* [degrees] Transmit Angles for each transmit of a RCA imaging scheme*/") \ + X(xdc_transform, float, [16] , mat4, , "/* IMPORTANT: column major order */") \ + X(dec_data_dim, uint32_t, [4] , uvec4, , "/* Samples * Channels * Acquisitions; last element ignored */") \ + X(xdc_element_pitch, float, [2] , vec2, , "/* [m] Transducer Element Pitch {row, col} */") \ + X(rf_raw_dim, uint32_t, [2] , uvec2, , "/* Raw Data Dimensions */") \ + X(transmit_mode, int32_t, , int, , "/* Method/Orientation of Transmit */") \ + X(decode, uint32_t, , uint, , "/* Decode or just reshape data */") \ + X(das_shader_id, uint32_t, , uint, , "") \ + X(time_offset, float, , float, , "/* pulse length correction time [s] */") #define BEAMFORMER_PARAMS_HEAD \ - X(xdc_transform, f32, [16] , mat4, , "/* IMPORTANT: column major order */") \ - X(dec_data_dim, uv4, , uvec4, , "/* Samples * Channels * Acquisitions; last element ignored */") \ - X(xdc_element_pitch, f32, [2] , vec2, , "/* [m] Transducer Element Pitch {row, col} */") \ - X(rf_raw_dim, uv2, , uvec2, , "/* Raw Data Dimensions */") \ - X(transmit_mode, i32, , int, , "/* Method/Orientation of Transmit */") \ - X(decode, u32, , uint, , "/* Decode or just reshape data */") \ - X(das_shader_id, u32, , uint, , "") \ - X(time_offset, f32, , float, , "/* pulse length correction time [s] */") + X(xdc_transform, float, [16], mat4, , "/* IMPORTANT: column major order */") \ + X(dec_data_dim, uint32_t, [4] , uvec4, , "/* Samples * Channels * Acquisitions; last element ignored */") \ + X(xdc_element_pitch, float, [2] , vec2, , "/* [m] Transducer Element Pitch {row, col} */") \ + X(rf_raw_dim, uint32_t, [2] , uvec2, , "/* Raw Data Dimensions */") \ + X(transmit_mode, int32_t, , int, , "/* Method/Orientation of Transmit */") \ + X(decode, uint32_t, , uint, , "/* Decode or just reshape data */") \ + X(das_shader_id, uint32_t, , uint, , "") \ + X(time_offset, float, , float, , "/* pulse length correction time [s] */") #define BEAMFORMER_PARAMS_TAIL \ - X(readi_group_id, u32, , uint, , "/* Which readi group this data is from */") \ - X(readi_group_size, u32, , uint, , "/* Size of readi transmit group */") + X(readi_group_id, uint32_t, , uint, , "/* Which readi group this data is from */") \ + X(readi_group_size, uint32_t, , uint, , "/* Size of readi transmit group */") #define X(name, type, size, gltype, glsize, comment) type name size; typedef struct { BEAMFORMER_UI_PARAMS } BeamformerUIParameters; @@ -118,7 +118,7 @@ typedef struct { BEAMFORMER_PARAMS_HEAD_V0 BEAMFORMER_UI_PARAMS BEAMFORMER_PARAMS_TAIL - f32 _pad[3]; + float _pad[3]; } BeamformerParametersV0; /* NOTE: This struct follows the OpenGL std140 layout. DO NOT modify unless you have @@ -127,7 +127,7 @@ typedef struct { BEAMFORMER_PARAMS_HEAD BEAMFORMER_UI_PARAMS BEAMFORMER_PARAMS_TAIL - f32 _pad[3]; + float _pad[3]; } BeamformerParameters; #undef X diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h @@ -101,9 +101,6 @@ typedef struct { /* TODO(rnp): these shouldn't be needed */ b32 export_next_frame; - /* TODO(rnp): probably remove this */ - c8 export_pipe_name[256]; - BeamformWorkQueue external_work_queue; } BeamformerSharedMemory; diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -1,33 +1,16 @@ /* See LICENSE for license details. */ #include "../util.h" -#include "../beamformer_parameters.h" +#include "ogl_beamformer_lib.h" #include "../beamformer_work_queue.c" #define PIPE_RETRY_PERIOD_MS (100ULL) -static BeamformerSharedMemory *g_bp; +global BeamformerSharedMemory *g_bp; #if defined(__linux__) -#include <fcntl.h> -#include <linux/futex.h> -#include <poll.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/syscall.h> -#include <time.h> -#include <unistd.h> - -i64 syscall(i64, ...); - -#define OS_EXPORT_PIPE_NAME "/tmp/beamformer_output_pipe" - +#include "../os_linux.c" #elif defined(_WIN32) - -#define OS_EXPORT_PIPE_NAME "\\\\.\\pipe\\beamformer_output_fifo" - -#define OPEN_EXISTING 3 -#define GENERIC_WRITE 0x40000000 -#define FILE_MAP_ALL_ACCESS 0x000F001F +#include "../os_win32.c" #define PIPE_TYPE_BYTE 0x00 #define PIPE_ACCESS_INBOUND 0x01 @@ -39,50 +22,18 @@ i64 syscall(i64, ...); #define ERROR_PIPE_NOT_CONNECTED 233L #define ERROR_PIPE_LISTENING 536L -#define W32(r) __declspec(dllimport) r __stdcall -W32(b32) CloseHandle(iptr); -W32(iptr) CreateFileA(c8 *, u32, u32, void *, u32, u32, void *); W32(iptr) CreateNamedPipeA(c8 *, u32, u32, u32, u32, u32, u32, void *); W32(b32) DisconnectNamedPipe(iptr); -W32(i32) GetLastError(void); -W32(iptr) MapViewOfFile(iptr, u32, u32, u32, u64); W32(iptr) OpenFileMappingA(u32, b32, c8 *); -W32(b32) ReadFile(iptr, u8 *, i32, i32 *, void *); W32(void) Sleep(u32); -W32(void) UnmapViewOfFile(iptr); -W32(b32) WaitOnAddress(void *, void *, uz, i32); -W32(b32) WriteFile(iptr, u8 *, i32, i32 *, void *); #else #error Unsupported Platform #endif -#if defined(MATLAB_CONSOLE) -#define mexErrMsgIdAndTxt mexErrMsgIdAndTxt_800 -#define mexWarnMsgIdAndTxt mexWarnMsgIdAndTxt_800 -void mexErrMsgIdAndTxt(const c8*, c8*, ...); -void mexWarnMsgIdAndTxt(const c8*, c8*, ...); -#define error_tag "ogl_beamformer_lib:error" -#define error_msg(...) mexErrMsgIdAndTxt(error_tag, __VA_ARGS__) -#define warning_msg(...) mexWarnMsgIdAndTxt(error_tag, __VA_ARGS__) -#else -#define error_msg(...) -#define warning_msg(...) -#endif - #if defined(__linux__) -static OS_WAIT_ON_VALUE_FN(os_wait_on_value) -{ - struct timespec *timeout = 0, timeout_value; - if (timeout_ms != -1) { - timeout_value.tv_sec = timeout_ms / 1000; - timeout_value.tv_nsec = (timeout_ms % 1000) * 1000000; - timeout = &timeout_value; - } - return syscall(SYS_futex, value, FUTEX_WAIT, current, timeout, 0, 0) == 0; -} -static Pipe +function Pipe os_open_read_pipe(char *name) { mkfifo(name, 0660); @@ -133,12 +84,14 @@ os_open_shared_memory_area(char *name) #elif defined(_WIN32) -static OS_WAIT_ON_VALUE_FN(os_wait_on_value) +/* TODO(rnp): temporary workaround */ +function OS_WAIT_ON_VALUE_FN(os_wait_on_value_stub) { /* TODO(rnp): this doesn't work across processes on win32 (return 1 to cause a spin wait) */ return 1; return WaitOnAddress(value, &current, sizeof(*value), timeout_ms); } +#define os_wait_on_value os_wait_on_value_stub static Pipe os_open_read_pipe(char *name) @@ -188,14 +141,13 @@ os_wait_read_pipe(Pipe p, void *buf, iz read_size, u32 timeout_ms) return total_read == read_size; } -static BeamformerSharedMemory * +function BeamformerSharedMemory * os_open_shared_memory_area(char *name) { BeamformerSharedMemory *result = 0; iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name); if (h != INVALID_FILE) { - iptr view = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE); - result = (BeamformerSharedMemory *)view; + result = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE); CloseHandle(h); } @@ -204,28 +156,26 @@ os_open_shared_memory_area(char *name) #endif -static b32 -check_shared_memory(char *name) +function b32 +check_shared_memory(void) { + b32 result = 1; if (!g_bp) { - g_bp = os_open_shared_memory_area(name); - if (!g_bp) { - error_msg("failed to open shared memory area"); - return 0; - } + g_bp = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME); + if (!g_bp) result = 0; } - return 1; + return result; } b32 -set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count) +set_beamformer_pipeline(i32 *stages, i32 stages_count) { if (stages_count > ARRAY_COUNT(g_bp->compute_stages)) { - error_msg("maximum stage count is %lu", ARRAY_COUNT(g_bp->compute_stages)); + //error_msg("maximum stage count is %lu", ARRAY_COUNT(g_bp->compute_stages)); return 0; } - if (!check_shared_memory(shm_name)) + if (!check_shared_memory()) return 0; for (i32 i = 0; i < stages_count; i++) { @@ -235,7 +185,7 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count) #undef X if (!valid) { - error_msg("invalid shader stage: %d", stages[i]); + //error_msg("invalid shader stage: %d", stages[i]); return 0; } @@ -247,9 +197,9 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count) } b32 -beamformer_start_compute(char *shm_name, u32 image_plane_tag) +beamformer_start_compute(u32 image_plane_tag) { - b32 result = image_plane_tag < IPT_LAST && check_shared_memory(shm_name); + b32 result = image_plane_tag < IPT_LAST && check_shared_memory(); if (result) { result = !atomic_load(&g_bp->dispatch_compute_sync); if (result) { @@ -261,10 +211,10 @@ beamformer_start_compute(char *shm_name, u32 image_plane_tag) } function b32 -beamformer_upload_buffer(char *shm_name, void *data, u32 size, i32 store_offset, i32 sync_offset, +beamformer_upload_buffer(void *data, u32 size, i32 store_offset, i32 sync_offset, BeamformerUploadKind kind, i32 timeout_ms) { - b32 result = check_shared_memory(shm_name); + b32 result = check_shared_memory(); if (result) { BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); result = work && try_wait_sync((i32 *)((u8 *)g_bp + sync_offset), timeout_ms, os_wait_on_value); @@ -288,23 +238,23 @@ beamformer_upload_buffer(char *shm_name, void *data, u32 size, i32 store_offset, X(focal_vectors, f32, 2, FOCAL_VECTORS) #define X(name, dtype, elements, command) \ -b32 beamformer_push_##name (char *shm_id, dtype *data, u32 count, i32 timeout_ms) { \ - b32 result = count <= ARRAY_COUNT(g_bp->name); \ - if (result) { \ - result = beamformer_upload_buffer(shm_id, data, count * elements * sizeof(dtype), \ - offsetof(BeamformerSharedMemory, name), \ - offsetof(BeamformerSharedMemory, name##_sync), \ - BU_KIND_##command, timeout_ms); \ - } \ - return result; \ +b32 beamformer_push_##name (dtype *data, u32 count, i32 timeout_ms) { \ + b32 result = count <= ARRAY_COUNT(g_bp->name); \ + if (result) { \ + result = beamformer_upload_buffer(data, count * elements * sizeof(dtype), \ + offsetof(BeamformerSharedMemory, name), \ + offsetof(BeamformerSharedMemory, name##_sync), \ + BU_KIND_##command, timeout_ms); \ + } \ + return result; \ } BEAMFORMER_UPLOAD_FNS #undef X b32 -beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms) +beamformer_push_parameters(BeamformerParameters *bp, i32 timeout_ms) { - b32 result = beamformer_upload_buffer(shm_name, bp, sizeof(*bp), + b32 result = beamformer_upload_buffer(bp, sizeof(*bp), offsetof(BeamformerSharedMemory, parameters), offsetof(BeamformerSharedMemory, parameters_sync), BU_KIND_PARAMETERS, timeout_ms); @@ -312,11 +262,11 @@ beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout } b32 -beamformer_push_data(char *shm_name, void *data, u32 data_size, i32 timeout_ms) +beamformer_push_data(void *data, u32 data_size, i32 timeout_ms) { b32 result = data_size <= BEAMFORMER_MAX_RF_DATA_SIZE; if (result) { - result = beamformer_upload_buffer(shm_name, data, data_size, BEAMFORMER_RF_DATA_OFF, + result = beamformer_upload_buffer(data, data_size, BEAMFORMER_RF_DATA_OFF, offsetof(BeamformerSharedMemory, raw_data_sync), BU_KIND_RF_DATA, timeout_ms); } @@ -324,9 +274,9 @@ beamformer_push_data(char *shm_name, void *data, u32 data_size, i32 timeout_ms) } b32 -beamformer_push_parameters_ui(char *shm_name, BeamformerUIParameters *bp, i32 timeout_ms) +beamformer_push_parameters_ui(BeamformerUIParameters *bp, i32 timeout_ms) { - b32 result = check_shared_memory(shm_name); + b32 result = check_shared_memory(); if (result) { BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); result = work && try_wait_sync(&g_bp->parameters_ui_sync, timeout_ms, os_wait_on_value); @@ -345,9 +295,9 @@ beamformer_push_parameters_ui(char *shm_name, BeamformerUIParameters *bp, i32 ti } b32 -beamformer_push_parameters_head(char *shm_name, BeamformerParametersHead *bp, i32 timeout_ms) +beamformer_push_parameters_head(BeamformerParametersHead *bp, i32 timeout_ms) { - b32 result = check_shared_memory(shm_name); + b32 result = check_shared_memory(); if (result) { BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); result = work && try_wait_sync(&g_bp->parameters_head_sync, timeout_ms, os_wait_on_value); @@ -366,27 +316,27 @@ beamformer_push_parameters_head(char *shm_name, BeamformerParametersHead *bp, i3 } b32 -set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp) +set_beamformer_parameters(BeamformerParametersV0 *new_bp) { b32 result = 0; - result |= beamformer_push_channel_mapping(shm_name, (i16 *)new_bp->channel_mapping, - ARRAY_COUNT(new_bp->channel_mapping), 0); - result |= beamformer_push_sparse_elements(shm_name, (i16 *)new_bp->uforces_channels, - ARRAY_COUNT(new_bp->uforces_channels), 0); + result |= beamformer_push_channel_mapping((i16 *)new_bp->channel_mapping, + countof(new_bp->channel_mapping), 0); + result |= beamformer_push_sparse_elements((i16 *)new_bp->uforces_channels, + countof(new_bp->uforces_channels), 0); v2 focal_vectors[256]; for (u32 i = 0; i < ARRAY_COUNT(focal_vectors); i++) focal_vectors[i] = (v2){{new_bp->transmit_angles[i], new_bp->focal_depths[i]}}; - result |= beamformer_push_focal_vectors(shm_name, (f32 *)focal_vectors, ARRAY_COUNT(focal_vectors), 0); - result |= beamformer_push_parameters(shm_name, (BeamformerParameters *)&new_bp->xdc_transform, 0); + result |= beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0); + result |= beamformer_push_parameters((BeamformerParameters *)&new_bp->xdc_transform, 0); return result; } b32 -send_data(char *pipe_name, char *shm_name, void *data, u32 data_size) +send_data(void *data, u32 data_size) { - b32 result = beamformer_push_data(shm_name, data, data_size, 0); + b32 result = beamformer_push_data(data, data_size, 0); if (result) { - if (beamformer_start_compute(shm_name, 0)) { + if (beamformer_start_compute(0)) { /* TODO(rnp): should we just set timeout on acquiring the lock instead of this? */ try_wait_sync(&g_bp->raw_data_sync, -1, os_wait_on_value); atomic_store(&g_bp->raw_data_sync, 1); @@ -394,52 +344,41 @@ send_data(char *pipe_name, char *shm_name, void *data, u32 data_size) result = 0; /* TODO(rnp): HACK: this is strictly meant for matlab; we need a real * recovery method. for most (all?) old api uses this won't be hit */ - warning_msg("failed to start compute after sending data\n" - "library in a borked state\n" - "try calling beamformer_start_compute()"); + //warning_msg("failed to start compute after sending data\n" + // "library in a borked state\n" + // "try calling beamformer_start_compute()"); } } return result; } b32 -beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data_size, - uv4 output_points, f32 *out_data, i32 timeout_ms) +beamform_data_synchronized(void *data, u32 data_size, u32 output_points[3], f32 *out_data, i32 timeout_ms) { - if (!check_shared_memory(shm_name)) + if (!check_shared_memory()) return 0; - if (output_points.x == 0) output_points.x = 1; - if (output_points.y == 0) output_points.y = 1; - if (output_points.z == 0) output_points.z = 1; - output_points.w = 1; + output_points[0] = MIN(1, output_points[0]); + output_points[1] = MIN(1, output_points[1]); + output_points[2] = MIN(1, output_points[2]); - g_bp->parameters.output_points.x = output_points.x; - g_bp->parameters.output_points.y = output_points.y; - g_bp->parameters.output_points.z = output_points.z; - g_bp->export_next_frame = 1; - - s8 export_name = s8(OS_EXPORT_PIPE_NAME); - if (export_name.len > ARRAY_COUNT(g_bp->export_pipe_name)) { - error_msg("export pipe name too long"); - return 0; - } + g_bp->parameters.output_points[0] = output_points[0]; + g_bp->parameters.output_points[1] = output_points[1]; + g_bp->parameters.output_points[2] = output_points[2]; + g_bp->export_next_frame = 1; Pipe export_pipe = os_open_read_pipe(OS_EXPORT_PIPE_NAME); if (export_pipe.file == INVALID_FILE) { - error_msg("failed to open export pipe"); + //error_msg("failed to open export pipe"); return 0; } - for (u32 i = 0; i < export_name.len; i++) - g_bp->export_pipe_name[i] = export_name.data[i]; - - b32 result = send_data(pipe_name, shm_name, data, data_size); + b32 result = send_data(data, data_size); if (result) { - iz output_size = output_points.x * output_points.y * output_points.z * sizeof(f32) * 2; + iz output_size = output_points[0] * output_points[1] * output_points[2] * sizeof(f32) * 2; result = os_wait_read_pipe(export_pipe, out_data, output_size, timeout_ms); - if (!result) - warning_msg("failed to read full export data from pipe"); + //if (!result) + // warning_msg("failed to read full export data from pipe"); } os_disconnect_pipe(export_pipe); diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -1,17 +1,5 @@ /* See LICENSE for license details. */ -#include <stddef.h> #include <stdint.h> - -typedef int16_t i16; -typedef uint16_t u16; -typedef int32_t i32; -typedef uint32_t u32; -typedef uint32_t b32; -typedef float f32; -typedef struct { u32 x, y; } uv2; -typedef struct { u32 x, y, z, w; } uv4; -typedef struct { f32 x, y, z, w; } v4; - #include "../beamformer_parameters.h" #if defined(_WIN32) @@ -22,22 +10,22 @@ typedef struct { f32 x, y, z, w; } v4; /* IMPORTANT: timeout of -1 will block forever */ -LIB_FN b32 set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *); -LIB_FN b32 set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count); -LIB_FN b32 send_data(char *pipe_name, char *shm_name, void *data, u32 data_size); - +LIB_FN uint32_t set_beamformer_parameters(BeamformerParametersV0 *); +LIB_FN uint32_t set_beamformer_pipeline(int32_t *stages, int32_t stages_count); +LIB_FN uint32_t send_data(void *data, uint32_t data_size); /* NOTE: sends data and waits for (complex) beamformed data to be returned. - * out_data: must be allocated by the caller as 2 f32s per output point. */ -LIB_FN b32 beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data_size, - uv4 output_points, f32 *out_data, i32 timeout_ms); + * out_data: must be allocated by the caller as 2 floats per output point. */ +LIB_FN uint32_t beamform_data_synchronized(void *data, uint32_t data_size, uint32_t output_points[3], + float *out_data, int32_t timeout_ms); -LIB_FN b32 beamformer_start_compute(char *shm_name, u32 image_plane_tag); +LIB_FN uint32_t beamformer_start_compute(uint32_t image_plane_tag); /* NOTE: these functions only queue an upload; you must flush (old data functions or start_compute) */ -LIB_FN b32 beamformer_push_data(char *shm_name, void *data, u32 data_size, i32 timeout_ms); -LIB_FN b32 beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms); -LIB_FN b32 beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms); -LIB_FN b32 beamformer_push_focal_vectors(char *shm_name, f32 *vectors, u32 count, i32 timeout_ms); -LIB_FN b32 beamformer_push_parameters(char *shm_name, BeamformerParameters *bp, i32 timeout_ms); -LIB_FN b32 beamformer_push_parameters_ui(char *shm_name, BeamformerUIParameters *, i32 timeout_ms); -LIB_FN b32 beamformer_push_parameters_head(char *shm_name, BeamformerParametersHead *, i32 timeout_ms); +LIB_FN uint32_t beamformer_push_data(void *data, uint32_t size, int32_t timeout_ms); +LIB_FN uint32_t beamformer_push_channel_mapping(int16_t *mapping, uint32_t count, int32_t timeout_ms); +LIB_FN uint32_t beamformer_push_sparse_elements(int16_t *elements, uint32_t count, int32_t timeout_ms); +LIB_FN uint32_t beamformer_push_focal_vectors(float *vectors, uint32_t count, int32_t timeout_ms); + +LIB_FN uint32_t beamformer_push_parameters(BeamformerParameters *, int32_t timeout_ms); +LIB_FN uint32_t beamformer_push_parameters_ui(BeamformerUIParameters *, int32_t timeout_ms); +LIB_FN uint32_t beamformer_push_parameters_head(BeamformerParametersHead *, int32_t timeout_ms); diff --git a/main_linux.c b/main_linux.c @@ -15,8 +15,6 @@ #define OS_RENDERDOC_SONAME "librenderdoc.so" -#define OS_SMEM_NAME "/ogl_beamformer_shared_memory" - #define OS_PATH_SEPERATOR "/" #include "static.c" @@ -75,6 +73,7 @@ main(void) ctx.os.file_watch_context.handle = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); ctx.os.compute_worker.asleep = 1; ctx.os.stderr = STDERR_FILENO; + ctx.os.export_pipe_name = OS_EXPORT_PIPE_NAME; debug_init(&ctx.os, (iptr)&input, &temp_memory); setup_beamformer(&ctx, &temp_memory); @@ -99,5 +98,5 @@ main(void) /* NOTE: make sure this will get cleaned up after external * programs release their references */ - shm_unlink(OS_SMEM_NAME); + shm_unlink(OS_SHARED_MEMORY_NAME); } diff --git a/main_w32.c b/main_w32.c @@ -15,8 +15,6 @@ #define OS_RENDERDOC_SONAME "renderdoc.dll" -#define OS_SMEM_NAME "Local\\ogl_beamformer_parameters" - #define OS_PATH_SEPERATOR "\\" #include "static.c" @@ -109,6 +107,7 @@ main(void) ctx.os.context = (iptr)&w32_ctx; ctx.os.compute_worker.asleep = 1; ctx.os.stderr = GetStdHandle(STD_ERROR_HANDLE); + ctx.os.export_pipe_name = OS_EXPORT_PIPE_NAME; debug_init(&ctx.os, (iptr)&input, &temp_memory); setup_beamformer(&ctx, &temp_memory); diff --git a/os_linux.c b/os_linux.c @@ -3,6 +3,9 @@ /* NOTE(rnp): provides the platform layer for the beamformer. This code must * be provided by any platform the beamformer is ported to. */ +#define OS_SHARED_MEMORY_NAME "/ogl_beamformer_shared_memory" +#define OS_EXPORT_PIPE_NAME "/tmp/beamformer_output_pipe" + #include "util.h" #include <dlfcn.h> @@ -138,19 +141,17 @@ static OS_READ_FILE_FN(os_read_file) return total_read; } -static void * -os_open_shared_memory_area(char *name, iz cap) +function void * +os_create_shared_memory_area(char *name, iz cap) { void *result = 0; i32 fd = shm_open(name, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); - if (fd > 0) { - if (ftruncate(fd, cap) != -1) { - void *new = mmap(NULL, cap, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (new != MAP_FAILED) - result = new; - } - close(fd); + if (fd > 0 && ftruncate(fd, cap) != -1) { + void *new = mmap(NULL, cap, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (new != MAP_FAILED) + result = new; } + if (fd > 0) close(fd); return result; } diff --git a/os_win32.c b/os_win32.c @@ -1,4 +1,8 @@ /* See LICENSE for license details. */ + +#define OS_SHARED_MEMORY_NAME "Local\\ogl_beamformer_parameters" +#define OS_EXPORT_PIPE_NAME "\\\\.\\pipe\\beamformer_output_pipe" + #include "util.h" #define STD_INPUT_HANDLE -10 @@ -244,8 +248,8 @@ os_file_exists(char *path) return result; } -static void * -os_open_shared_memory_area(char *name, iz cap) +function void * +os_create_shared_memory_area(char *name, iz cap) { void *result = 0; iptr h = CreateFileMappingA(-1, 0, PAGE_READWRITE, 0, cap, name); diff --git a/static.c b/static.c @@ -323,7 +323,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) ctx->beamform_work_queue = push_struct(memory, BeamformWorkQueue); - ctx->shared_memory = os_open_shared_memory_area(OS_SMEM_NAME, BEAMFORMER_SHARED_MEMORY_SIZE); + ctx->shared_memory = os_create_shared_memory_area(OS_SHARED_MEMORY_NAME, BEAMFORMER_SHARED_MEMORY_SIZE); if (!ctx->shared_memory) os_fatal(s8("Get more ram lol\n")); mem_clear(ctx->shared_memory, 0, sizeof(*ctx->shared_memory)); diff --git a/ui.c b/ui.c @@ -923,11 +923,11 @@ add_beamformer_parameters_view(Variable *parent, BeamformerCtx *ctx) group = add_variable_group(ui, group, &ui->arena, s8("Lateral Extent:"), VG_V2, ui->font); { add_beamformer_variable_f32(ui, group, &ui->arena, s8("Min:"), s8("[mm]"), - &bp->output_min_coordinate.x, v2_inf, 1e3, 0.5e-3, + bp->output_min_coordinate + 0, v2_inf, 1e3, 0.5e-3, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); add_beamformer_variable_f32(ui, group, &ui->arena, s8("Max:"), s8("[mm]"), - &bp->output_max_coordinate.x, v2_inf, 1e3, 0.5e-3, + bp->output_max_coordinate + 0, v2_inf, 1e3, 0.5e-3, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); } group = end_variable_group(group); @@ -935,11 +935,11 @@ add_beamformer_parameters_view(Variable *parent, BeamformerCtx *ctx) group = add_variable_group(ui, group, &ui->arena, s8("Axial Extent:"), VG_V2, ui->font); { add_beamformer_variable_f32(ui, group, &ui->arena, s8("Min:"), s8("[mm]"), - &bp->output_min_coordinate.z, v2_inf, 1e3, 0.5e-3, + bp->output_min_coordinate + 2, v2_inf, 1e3, 0.5e-3, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); add_beamformer_variable_f32(ui, group, &ui->arena, s8("Max:"), s8("[mm]"), - &bp->output_max_coordinate.z, v2_inf, 1e3, 0.5e-3, + bp->output_max_coordinate + 2, v2_inf, 1e3, 0.5e-3, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); } group = end_variable_group(group); @@ -1085,10 +1085,10 @@ ui_fill_live_frame_view(BeamformerUI *ui, BeamformerFrameView *bv) { ScaleBar *lateral = &bv->lateral_scale_bar.u.scale_bar; ScaleBar *axial = &bv->axial_scale_bar.u.scale_bar; - lateral->min_value = &ui->params.output_min_coordinate.x; - lateral->max_value = &ui->params.output_max_coordinate.x; - axial->min_value = &ui->params.output_min_coordinate.z; - axial->max_value = &ui->params.output_max_coordinate.z; + lateral->min_value = ui->params.output_min_coordinate + 0; + lateral->max_value = ui->params.output_max_coordinate + 0; + axial->min_value = ui->params.output_min_coordinate + 2; + axial->max_value = ui->params.output_max_coordinate + 2; bv->axial_scale_bar_active->u.b32 = 1; bv->lateral_scale_bar_active->u.b32 = 1; bv->ctx = ui->frame_view_render_context; @@ -1165,15 +1165,15 @@ view_update(BeamformerUI *ui, BeamformerFrameView *view) view->needs_update |= view->frame != ui->latest_plane[index]; view->frame = ui->latest_plane[index]; if (view->needs_update) { - view->min_coordinate = ui->params.output_min_coordinate; - view->max_coordinate = ui->params.output_max_coordinate; + view->min_coordinate = v4_from_f32_array(ui->params.output_min_coordinate); + view->max_coordinate = v4_from_f32_array(ui->params.output_max_coordinate); } } /* TODO(rnp): x-z or y-z */ /* TODO(rnp): add method of setting a target size in frame view */ uv2 current = view->texture_dim; - uv2 target = {.w = ui->params.output_points.x, .h = ui->params.output_points.z}; + uv2 target = {.w = ui->params.output_points[0], .h = ui->params.output_points[2]}; if (view->type != FVT_COPY && !uv2_equal(current, target) && !uv2_equal(target, (uv2){0})) { resize_frame_view(view, target); view->needs_update = 1; @@ -2808,16 +2808,16 @@ ui_init(BeamformerCtx *ctx, Arena store) ASSERT(ui->arena.beg - (u8 *)ui < KB(64)); } -static void +function void validate_ui_parameters(BeamformerUIParameters *p) { - if (p->output_min_coordinate.x > p->output_max_coordinate.x) - SWAP(p->output_min_coordinate.x, p->output_max_coordinate.x) - if (p->output_min_coordinate.z > p->output_max_coordinate.z) - SWAP(p->output_min_coordinate.z, p->output_max_coordinate.z) + if (p->output_min_coordinate[0] > p->output_max_coordinate[0]) + SWAP(p->output_min_coordinate[0], p->output_max_coordinate[0]) + if (p->output_min_coordinate[2] > p->output_max_coordinate[2]) + SWAP(p->output_min_coordinate[2], p->output_max_coordinate[2]) } -static void +function void draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw, ImagePlaneTag frame_plane, ComputeShaderStats *latest_compute_stats) { diff --git a/util.c b/util.c @@ -649,13 +649,35 @@ magnitude_v2(v2 a) return result; } -static b32 +function uv4 +uv4_from_u32_array(u32 v[4]) +{ + uv4 result; + result.E[0] = v[0]; + result.E[1] = v[1]; + result.E[2] = v[2]; + result.E[3] = v[3]; + return result; +} + +function b32 uv4_equal(uv4 a, uv4 b) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; } -static v4 +function v4 +v4_from_f32_array(f32 v[4]) +{ + v4 result; + result.E[0] = v[0]; + result.E[1] = v[1]; + result.E[2] = v[2]; + result.E[3] = v[3]; + return result; +} + +function v4 sub_v4(v4 a, v4 b) { v4 result; diff --git a/util.h b/util.h @@ -316,6 +316,8 @@ struct OS { iptr stderr; GLWorkerThreadContext compute_worker; + char *export_pipe_name; + DEBUG_DECL(renderdoc_start_frame_capture_fn *start_frame_capture); DEBUG_DECL(renderdoc_end_frame_capture_fn *end_frame_capture); };