ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 7e351ca34f89675e39ee52a4d516a87c1bb17e15
Parent: c075fc32ff89c97466394030b1075a7156a0de88
Author: Randy Palamar
Date:   Sun,  6 Apr 2025 12:57:38 -0600

core/lib: upload sparse elements as a 1D i16 texture

no more bit manipulation/integer unpacking

Diffstat:
Mbeamformer.c | 13+++++++++++++
Mbeamformer.h | 1+
Mbeamformer_parameters.h | 1-
Mhelpers/ogl_beamformer_lib.c | 50+++++++++++++++++++++++++++++++++++++++-----------
Mhelpers/ogl_beamformer_lib.h | 1+
Mshaders/das.glsl | 47++++++++++++++++++++---------------------------
Mstatic.c | 1+
7 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -347,6 +347,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame, case CS_DAS: { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); glBindImageTexture(0, frame->frame.texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); + glBindImageTexture(1, csctx->sparse_elements_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16I); #if 1 /* TODO(rnp): compute max_points_per_dispatch based on something like a @@ -614,6 +615,18 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co break; } } break; + case BW_UPLOAD_SPARSE_ELEMENTS: { + ASSERT(!atomic_load(&ctx->shared_memory->sparse_elements_sync)); + if (!cs->sparse_elements_texture) { + glCreateTextures(GL_TEXTURE_1D, 1, &cs->sparse_elements_texture); + glTextureStorage1D(cs->sparse_elements_texture, 1, GL_R16I, + ARRAY_COUNT(sm->sparse_elements)); + LABEL_GL_OBJECT(GL_TEXTURE, cs->sparse_elements_texture, s8("Sparse_Elements")); + } + glTextureSubImage1D(cs->sparse_elements_texture, 0, 0, + ARRAY_COUNT(sm->sparse_elements), GL_RED_INTEGER, + GL_SHORT, sm->sparse_elements); + } break; case BW_COMPUTE: { atomic_store(&cs->processing_compute, 1); start_renderdoc_capture(gl_context); diff --git a/beamformer.h b/beamformer.h @@ -87,6 +87,7 @@ typedef struct { u32 shared_ubo; u32 channel_mapping_texture; + u32 sparse_elements_texture; f32 processing_progress; b32 processing_compute; diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -69,7 +69,6 @@ typedef enum { X(time_offset, f32, , float, , "/* pulse length correction time [s] */") #define BEAMFORMER_PARAMS_HEAD \ - X(uforces_channels, u16, [256], uvec4, [32], "/* Channels used for virtual UFORCES elements */") \ X(focal_depths, f32, [256], vec4, [64], "/* [m] Focal Depths for each transmit of a RCA imaging scheme*/") \ X(transmit_angles, f32, [256], vec4, [64], "/* [radians] Transmit Angles for each transmit of a RCA imaging scheme*/") \ X(xdc_transform, f32, [16] , mat4, , "/* IMPORTANT: column major order */") \ diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -211,6 +211,24 @@ os_open_shared_memory_area(char *name) #endif static b32 +try_wait_sync(i32 *sync, i32 timeout_ms) +{ + b32 result = 0; + for (;;) { + i32 current = atomic_load(sync); + if (current) { + atomic_inc(sync, -current); + result = 1; + break; + } else if (!timeout_ms) { + break; + } + os_wait_on_value(sync, 0, timeout_ms); + } + return result; +} + +static b32 check_shared_memory(char *name) { if (!g_bp) { @@ -258,20 +276,28 @@ beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 tim b32 result = check_shared_memory(shm_name) && count <= ARRAY_COUNT(g_bp->channel_mapping); if (result) { BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); - if (work) { - /* TODO(rnp): refactor */ - for (;;) { - i32 current = atomic_load(&g_bp->channel_mapping_sync); - if (current) { - atomic_inc(&g_bp->channel_mapping_sync, -current); - break; - } - os_wait_on_value(&g_bp->channel_mapping_sync, 0, timeout_ms); - } + result = work && try_wait_sync(&g_bp->channel_mapping_sync, timeout_ms); + if (result) { work->type = BW_UPLOAD_CHANNEL_MAPPING; work->completion_barrier = offsetof(BeamformerSharedMemory, channel_mapping_sync); mem_copy(g_bp->channel_mapping, mapping, count * sizeof(*mapping)); + beamform_work_queue_push_commit(&g_bp->external_work_queue); + } + } + return result; +} +b32 +beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms) +{ + b32 result = check_shared_memory(shm_name) && count <= ARRAY_COUNT(g_bp->sparse_elements); + if (result) { + BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue); + result = work && try_wait_sync(&g_bp->sparse_elements_sync, timeout_ms); + if (result) { + work->type = BW_UPLOAD_SPARSE_ELEMENTS; + work->completion_barrier = offsetof(BeamformerSharedMemory, sparse_elements_sync); + mem_copy(g_bp->sparse_elements, elements, count * sizeof(*elements)); beamform_work_queue_push_commit(&g_bp->external_work_queue); } } @@ -284,8 +310,10 @@ set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp) b32 result = 0; result |= beamformer_push_channel_mapping(shm_name, (i16 *)new_bp->channel_mapping, ARRAY_COUNT(new_bp->channel_mapping), 0); + result |= beamformer_push_sparse_elements(shm_name, (i16 *)new_bp->uforces_channels, + ARRAY_COUNT(new_bp->uforces_channels), 0); if (result) { - mem_copy(&g_bp->raw, &new_bp->uforces_channels, sizeof(BeamformerParameters)); + mem_copy(&g_bp->raw, &new_bp->focal_depths, sizeof(g_bp->raw)); g_bp->upload = 1; } diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -34,3 +34,4 @@ LIB_FN b32 beamform_data_synchronized(char *pipe_name, char *shm_name, void *dat /* NOTE: these functions only queue an upload; you must flush (for now via one of the data functions) */ LIB_FN b32 beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms); +LIB_FN b32 beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms); diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -3,7 +3,8 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { vec2 rf_data[]; }; -layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex; +layout(rg32f, binding = 0) writeonly restrict uniform image3D u_out_data_tex; +layout(r16i, binding = 1) readonly restrict uniform iimage1D sparse_elements; layout(location = 2) uniform ivec3 u_voxel_offset; layout(location = 3) uniform uint u_cycle_t; @@ -17,7 +18,7 @@ layout(location = 3) uniform uint u_cycle_t; #define TX_MODE_RX_COLS(a) (((a) & 1) != 0) /* NOTE: See: https://cubic.org/docs/hermite.htm */ -vec2 cubic(uint ridx, float x) +vec2 cubic(int ridx, float x) { mat4 h = mat4( 2, -3, 0, 1, @@ -26,7 +27,7 @@ vec2 cubic(uint ridx, float x) 1, -1, 0, 0 ); - uint xk = uint(floor(x)); + int xk = int(floor(x)); float t = (x - float(xk)); vec4 S = vec4(t * t * t, t * t, t, 1); @@ -40,11 +41,11 @@ vec2 cubic(uint ridx, float x) return vec2(dot(S, h * C1), dot(S, h * C2)); } -vec2 sample_rf(uint ridx, float t) +vec2 sample_rf(int ridx, float t) { vec2 result; if (interpolate) result = cubic(ridx, t); - else result = rf_data[ridx + uint(floor(t))]; + else result = rf_data[ridx + int(floor(t))]; return result; } @@ -109,8 +110,8 @@ float cylindricalwave_transmit_distance(vec3 point, float focal_depth, float tra vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg) { - uint ridx = 0; - int direction = beamform_plane; + int ridx = 0; + int direction = beamform_plane; if (direction != TX_ROWS) image_point = image_point.yxz; bool tx_col = TX_MODE_TX_COLS(transmit_mode); @@ -146,8 +147,8 @@ vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg) float sidx = sample_index(transmit_distance + length(receive_distance)); vec2 valid = vec2(sidx >= 0) * vec2(sidx < dec_data_dim.x); sum += apodize(sample_rf(ridx, sidx), apodization_arg, length(receive_distance.xy)) * valid; - receive_distance -= delta; - ridx += dec_data_dim.x; + receive_distance -= delta; + ridx += int(dec_data_dim.x); } } return sum; @@ -155,8 +156,8 @@ vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg) vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg) { - uint uhercules = uint(das_shader_id == DAS_ID_UHERCULES); - uint ridx = dec_data_dim.y * dec_data_dim.x * uhercules; + int uhercules = int(das_shader_id == DAS_ID_UHERCULES); + int ridx = int(dec_data_dim.y * dec_data_dim.x * uhercules); int direction = beamform_plane; if (direction != TX_ROWS) image_point = image_point.yxz; @@ -179,12 +180,8 @@ vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg) vec2 sum = vec2(0); /* NOTE: For Each Acquistion in Raw Data */ - for (uint i = uhercules; i < dec_data_dim.z; i++) { - uint base_idx = ((i - uhercules) / 8); - uint sub_idx = ((i - uhercules) % 8) / 2; - uint shift = (~(i - uhercules) & 1u) * 16u; - uint channel = (uforces_channels[base_idx][sub_idx] << shift) >> 16u; - + for (int i = uhercules; i < dec_data_dim.z; i++) { + int channel = imageLoad(sparse_elements, i - uhercules).x; /* NOTE: For Each Virtual Source */ for (uint j = 0; j < dec_data_dim.y; j++) { vec3 element_position; @@ -199,7 +196,7 @@ vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg) sum += apodize(sample_rf(ridx, sidx), apodization_arg, length(receive_distance.xy)) * valid; - ridx += dec_data_dim.x; + ridx += int(dec_data_dim.x); } } return sum; @@ -208,8 +205,8 @@ vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg) vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg) { /* NOTE: skip first acquisition in uforces since its garbage */ - uint uforces = uint(das_shader_id == DAS_ID_UFORCES); - uint ridx = dec_data_dim.y * dec_data_dim.x * uforces; + int uforces = int(das_shader_id == DAS_ID_UFORCES); + int ridx = int(dec_data_dim.y * dec_data_dim.x * uforces); image_point = (xdc_transform * vec4(image_point, 1)).xyz; @@ -217,12 +214,8 @@ vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg) delta.y = 0; vec2 sum = vec2(0); - for (uint i = uforces; i < dec_data_dim.z; i++) { - uint base_idx = ((i - uforces) / 8); - uint sub_idx = ((i - uforces) % 8) / 2; - uint shift = (~(i - uforces) & 1u) * 16u; - uint channel = (uforces_channels[base_idx][sub_idx] << shift) >> 16u; - + for (int i = uforces; i < dec_data_dim.z; i++) { + int channel = imageLoad(sparse_elements, i - uforces).x; vec2 rdist = vec2(image_point.x, image_point.z); vec3 focal_point = channel * delta + focal_point_offset; float transmit_dist = distance(image_point, focal_point); @@ -232,7 +225,7 @@ vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg) vec2 valid = vec2(sidx >= 0) * vec2(sidx < dec_data_dim.x); sum += apodize(sample_rf(ridx, sidx), apodization_arg, rdist.x) * valid; rdist.x -= delta.x; - ridx += dec_data_dim.x; + ridx += int(dec_data_dim.x); } } return sum; diff --git a/static.c b/static.c @@ -308,6 +308,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) /* TODO(rnp): refactor - this is annoying */ ctx->shared_memory->raw_data_sync = 1; ctx->shared_memory->channel_mapping_sync = 1; + ctx->shared_memory->sparse_elements_sync = 1; /* NOTE: default compute shader pipeline */ ctx->shared_memory->compute_stages[0] = CS_DECODE;