Commit: 7e351ca34f89675e39ee52a4d516a87c1bb17e15
Parent: c075fc32ff89c97466394030b1075a7156a0de88
Author: Randy Palamar
Date: Sun, 6 Apr 2025 12:57:38 -0600
core/lib: upload sparse elements as a 1D i16 texture
no more bit manipulation/integer unpacking
Diffstat:
7 files changed, 75 insertions(+), 39 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -347,6 +347,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
case CS_DAS: {
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
glBindImageTexture(0, frame->frame.texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
+ glBindImageTexture(1, csctx->sparse_elements_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16I);
#if 1
/* TODO(rnp): compute max_points_per_dispatch based on something like a
@@ -614,6 +615,18 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
break;
}
} break;
+ case BW_UPLOAD_SPARSE_ELEMENTS: {
+ ASSERT(!atomic_load(&ctx->shared_memory->sparse_elements_sync));
+ if (!cs->sparse_elements_texture) {
+ glCreateTextures(GL_TEXTURE_1D, 1, &cs->sparse_elements_texture);
+ glTextureStorage1D(cs->sparse_elements_texture, 1, GL_R16I,
+ ARRAY_COUNT(sm->sparse_elements));
+ LABEL_GL_OBJECT(GL_TEXTURE, cs->sparse_elements_texture, s8("Sparse_Elements"));
+ }
+ glTextureSubImage1D(cs->sparse_elements_texture, 0, 0,
+ ARRAY_COUNT(sm->sparse_elements), GL_RED_INTEGER,
+ GL_SHORT, sm->sparse_elements);
+ } break;
case BW_COMPUTE: {
atomic_store(&cs->processing_compute, 1);
start_renderdoc_capture(gl_context);
diff --git a/beamformer.h b/beamformer.h
@@ -87,6 +87,7 @@ typedef struct {
u32 shared_ubo;
u32 channel_mapping_texture;
+ u32 sparse_elements_texture;
f32 processing_progress;
b32 processing_compute;
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -69,7 +69,6 @@ typedef enum {
X(time_offset, f32, , float, , "/* pulse length correction time [s] */")
#define BEAMFORMER_PARAMS_HEAD \
- X(uforces_channels, u16, [256], uvec4, [32], "/* Channels used for virtual UFORCES elements */") \
X(focal_depths, f32, [256], vec4, [64], "/* [m] Focal Depths for each transmit of a RCA imaging scheme*/") \
X(transmit_angles, f32, [256], vec4, [64], "/* [radians] Transmit Angles for each transmit of a RCA imaging scheme*/") \
X(xdc_transform, f32, [16] , mat4, , "/* IMPORTANT: column major order */") \
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -211,6 +211,24 @@ os_open_shared_memory_area(char *name)
#endif
static b32
+try_wait_sync(i32 *sync, i32 timeout_ms)
+{
+ b32 result = 0;
+ for (;;) {
+ i32 current = atomic_load(sync);
+ if (current) {
+ atomic_inc(sync, -current);
+ result = 1;
+ break;
+ } else if (!timeout_ms) {
+ break;
+ }
+ os_wait_on_value(sync, 0, timeout_ms);
+ }
+ return result;
+}
+
+static b32
check_shared_memory(char *name)
{
if (!g_bp) {
@@ -258,20 +276,28 @@ beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 tim
b32 result = check_shared_memory(shm_name) && count <= ARRAY_COUNT(g_bp->channel_mapping);
if (result) {
BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
- if (work) {
- /* TODO(rnp): refactor */
- for (;;) {
- i32 current = atomic_load(&g_bp->channel_mapping_sync);
- if (current) {
- atomic_inc(&g_bp->channel_mapping_sync, -current);
- break;
- }
- os_wait_on_value(&g_bp->channel_mapping_sync, 0, timeout_ms);
- }
+ result = work && try_wait_sync(&g_bp->channel_mapping_sync, timeout_ms);
+ if (result) {
work->type = BW_UPLOAD_CHANNEL_MAPPING;
work->completion_barrier = offsetof(BeamformerSharedMemory, channel_mapping_sync);
mem_copy(g_bp->channel_mapping, mapping, count * sizeof(*mapping));
+ beamform_work_queue_push_commit(&g_bp->external_work_queue);
+ }
+ }
+ return result;
+}
+b32
+beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms)
+{
+ b32 result = check_shared_memory(shm_name) && count <= ARRAY_COUNT(g_bp->sparse_elements);
+ if (result) {
+ BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
+ result = work && try_wait_sync(&g_bp->sparse_elements_sync, timeout_ms);
+ if (result) {
+ work->type = BW_UPLOAD_SPARSE_ELEMENTS;
+ work->completion_barrier = offsetof(BeamformerSharedMemory, sparse_elements_sync);
+ mem_copy(g_bp->sparse_elements, elements, count * sizeof(*elements));
beamform_work_queue_push_commit(&g_bp->external_work_queue);
}
}
@@ -284,8 +310,10 @@ set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp)
b32 result = 0;
result |= beamformer_push_channel_mapping(shm_name, (i16 *)new_bp->channel_mapping,
ARRAY_COUNT(new_bp->channel_mapping), 0);
+ result |= beamformer_push_sparse_elements(shm_name, (i16 *)new_bp->uforces_channels,
+ ARRAY_COUNT(new_bp->uforces_channels), 0);
if (result) {
- mem_copy(&g_bp->raw, &new_bp->uforces_channels, sizeof(BeamformerParameters));
+ mem_copy(&g_bp->raw, &new_bp->focal_depths, sizeof(g_bp->raw));
g_bp->upload = 1;
}
diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h
@@ -34,3 +34,4 @@ LIB_FN b32 beamform_data_synchronized(char *pipe_name, char *shm_name, void *dat
/* NOTE: these functions only queue an upload; you must flush (for now via one of the data functions) */
LIB_FN b32 beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms);
+LIB_FN b32 beamformer_push_sparse_elements(char *shm_name, i16 *elements, u32 count, i32 timeout_ms);
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -3,7 +3,8 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 {
vec2 rf_data[];
};
-layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex;
+layout(rg32f, binding = 0) writeonly restrict uniform image3D u_out_data_tex;
+layout(r16i, binding = 1) readonly restrict uniform iimage1D sparse_elements;
layout(location = 2) uniform ivec3 u_voxel_offset;
layout(location = 3) uniform uint u_cycle_t;
@@ -17,7 +18,7 @@ layout(location = 3) uniform uint u_cycle_t;
#define TX_MODE_RX_COLS(a) (((a) & 1) != 0)
/* NOTE: See: https://cubic.org/docs/hermite.htm */
-vec2 cubic(uint ridx, float x)
+vec2 cubic(int ridx, float x)
{
mat4 h = mat4(
2, -3, 0, 1,
@@ -26,7 +27,7 @@ vec2 cubic(uint ridx, float x)
1, -1, 0, 0
);
- uint xk = uint(floor(x));
+ int xk = int(floor(x));
float t = (x - float(xk));
vec4 S = vec4(t * t * t, t * t, t, 1);
@@ -40,11 +41,11 @@ vec2 cubic(uint ridx, float x)
return vec2(dot(S, h * C1), dot(S, h * C2));
}
-vec2 sample_rf(uint ridx, float t)
+vec2 sample_rf(int ridx, float t)
{
vec2 result;
if (interpolate) result = cubic(ridx, t);
- else result = rf_data[ridx + uint(floor(t))];
+ else result = rf_data[ridx + int(floor(t))];
return result;
}
@@ -109,8 +110,8 @@ float cylindricalwave_transmit_distance(vec3 point, float focal_depth, float tra
vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg)
{
- uint ridx = 0;
- int direction = beamform_plane;
+ int ridx = 0;
+ int direction = beamform_plane;
if (direction != TX_ROWS) image_point = image_point.yxz;
bool tx_col = TX_MODE_TX_COLS(transmit_mode);
@@ -146,8 +147,8 @@ vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg)
float sidx = sample_index(transmit_distance + length(receive_distance));
vec2 valid = vec2(sidx >= 0) * vec2(sidx < dec_data_dim.x);
sum += apodize(sample_rf(ridx, sidx), apodization_arg, length(receive_distance.xy)) * valid;
- receive_distance -= delta;
- ridx += dec_data_dim.x;
+ receive_distance -= delta;
+ ridx += int(dec_data_dim.x);
}
}
return sum;
@@ -155,8 +156,8 @@ vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg)
vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg)
{
- uint uhercules = uint(das_shader_id == DAS_ID_UHERCULES);
- uint ridx = dec_data_dim.y * dec_data_dim.x * uhercules;
+ int uhercules = int(das_shader_id == DAS_ID_UHERCULES);
+ int ridx = int(dec_data_dim.y * dec_data_dim.x * uhercules);
int direction = beamform_plane;
if (direction != TX_ROWS) image_point = image_point.yxz;
@@ -179,12 +180,8 @@ vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg)
vec2 sum = vec2(0);
/* NOTE: For Each Acquistion in Raw Data */
- for (uint i = uhercules; i < dec_data_dim.z; i++) {
- uint base_idx = ((i - uhercules) / 8);
- uint sub_idx = ((i - uhercules) % 8) / 2;
- uint shift = (~(i - uhercules) & 1u) * 16u;
- uint channel = (uforces_channels[base_idx][sub_idx] << shift) >> 16u;
-
+ for (int i = uhercules; i < dec_data_dim.z; i++) {
+ int channel = imageLoad(sparse_elements, i - uhercules).x;
/* NOTE: For Each Virtual Source */
for (uint j = 0; j < dec_data_dim.y; j++) {
vec3 element_position;
@@ -199,7 +196,7 @@ vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg)
sum += apodize(sample_rf(ridx, sidx), apodization_arg,
length(receive_distance.xy)) * valid;
- ridx += dec_data_dim.x;
+ ridx += int(dec_data_dim.x);
}
}
return sum;
@@ -208,8 +205,8 @@ vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg)
vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg)
{
/* NOTE: skip first acquisition in uforces since its garbage */
- uint uforces = uint(das_shader_id == DAS_ID_UFORCES);
- uint ridx = dec_data_dim.y * dec_data_dim.x * uforces;
+ int uforces = int(das_shader_id == DAS_ID_UFORCES);
+ int ridx = int(dec_data_dim.y * dec_data_dim.x * uforces);
image_point = (xdc_transform * vec4(image_point, 1)).xyz;
@@ -217,12 +214,8 @@ vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg)
delta.y = 0;
vec2 sum = vec2(0);
- for (uint i = uforces; i < dec_data_dim.z; i++) {
- uint base_idx = ((i - uforces) / 8);
- uint sub_idx = ((i - uforces) % 8) / 2;
- uint shift = (~(i - uforces) & 1u) * 16u;
- uint channel = (uforces_channels[base_idx][sub_idx] << shift) >> 16u;
-
+ for (int i = uforces; i < dec_data_dim.z; i++) {
+ int channel = imageLoad(sparse_elements, i - uforces).x;
vec2 rdist = vec2(image_point.x, image_point.z);
vec3 focal_point = channel * delta + focal_point_offset;
float transmit_dist = distance(image_point, focal_point);
@@ -232,7 +225,7 @@ vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg)
vec2 valid = vec2(sidx >= 0) * vec2(sidx < dec_data_dim.x);
sum += apodize(sample_rf(ridx, sidx), apodization_arg, rdist.x) * valid;
rdist.x -= delta.x;
- ridx += dec_data_dim.x;
+ ridx += int(dec_data_dim.x);
}
}
return sum;
diff --git a/static.c b/static.c
@@ -308,6 +308,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
/* TODO(rnp): refactor - this is annoying */
ctx->shared_memory->raw_data_sync = 1;
ctx->shared_memory->channel_mapping_sync = 1;
+ ctx->shared_memory->sparse_elements_sync = 1;
/* NOTE: default compute shader pipeline */
ctx->shared_memory->compute_stages[0] = CS_DECODE;