Commit: c075fc32ff89c97466394030b1075a7156a0de88
Parent: 8e2d399de3ad4d383c731cb6575a2c993a9cdb9e
Author: Randy Palamar
Date: Sun, 6 Apr 2025 12:26:52 -0600
core/lib: upload channel mapping as a 1D i16 texture
Diffstat:
8 files changed, 103 insertions(+), 31 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -160,7 +160,7 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
ctx->cuda_lib.register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos),
cs->raw_data_ssbo);
ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E,
- bp->channel_mapping);
+ ctx->shared_memory->channel_mapping);
break;
}
@@ -308,6 +308,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
glBindImageTexture(0, csctx->hadamard_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I);
+ glBindImageTexture(1, csctx->channel_mapping_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16I);
glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
ORONE(csctx->dec_data_dim.y / 32),
ORONE(csctx->dec_data_dim.z));
@@ -548,8 +549,9 @@ reload_compute_shader(BeamformerCtx *ctx, s8 path, s8 extra, ComputeShaderReload
static void
complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_context, iz barrier_offset)
{
- ComputeShaderCtx *cs = &ctx->csctx;
- BeamformerParameters *bp = &ctx->shared_memory->raw;
+ ComputeShaderCtx *cs = &ctx->csctx;
+ BeamformerParameters *bp = &ctx->shared_memory->raw;
+ BeamformerSharedMemory *sm = ctx->shared_memory;
BeamformWork *work = beamform_work_queue_pop(q);
while (work) {
@@ -579,6 +581,18 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
#undef X
}
} break;
+ case BW_UPLOAD_CHANNEL_MAPPING: {
+ ASSERT(!atomic_load(&ctx->shared_memory->channel_mapping_sync));
+ if (!cs->channel_mapping_texture) {
+ glCreateTextures(GL_TEXTURE_1D, 1, &cs->channel_mapping_texture);
+ glTextureStorage1D(cs->channel_mapping_texture, 1, GL_R16I,
+ ARRAY_COUNT(sm->channel_mapping));
+ LABEL_GL_OBJECT(GL_TEXTURE, cs->channel_mapping_texture, s8("Channel_Mapping"));
+ }
+ glTextureSubImage1D(cs->channel_mapping_texture, 0, 0,
+ ARRAY_COUNT(sm->channel_mapping), GL_RED_INTEGER,
+ GL_SHORT, sm->channel_mapping);
+ } break;
case BW_UPLOAD_RF_DATA: {
ASSERT(!atomic_load(&ctx->shared_memory->raw_data_sync));
diff --git a/beamformer.h b/beamformer.h
@@ -23,7 +23,7 @@ typedef struct {
b32 executable_reloaded;
} BeamformerInput;
-#define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u16 *channel_mapping)
+#define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, i16 *channel_mapping)
typedef INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_fn);
INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_stub) {}
@@ -86,6 +86,8 @@ typedef struct {
u32 shared_ubo;
+ u32 channel_mapping_texture;
+
f32 processing_progress;
b32 processing_compute;
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -54,7 +54,7 @@ typedef enum {
X(f_number, f32, , float, , "/* F# (set to 0 to disable) */") \
X(interpolate, b32, , bool, , "/* Perform Cubic Interpolation of RF Samples */")
-#define BEAMFORMER_PARAMS_HEAD \
+#define BEAMFORMER_PARAMS_HEAD_V0 \
X(channel_mapping, u16, [256], uvec4, [32], "/* Transducer Channel to Verasonics Channel */") \
X(uforces_channels, u16, [256], uvec4, [32], "/* Channels used for virtual UFORCES elements */") \
X(focal_depths, f32, [256], vec4, [64], "/* [m] Focal Depths for each transmit of a RCA imaging scheme*/") \
@@ -68,12 +68,33 @@ typedef enum {
X(das_shader_id, u32, , uint, , "") \
X(time_offset, f32, , float, , "/* pulse length correction time [s] */")
+#define BEAMFORMER_PARAMS_HEAD \
+ X(uforces_channels, u16, [256], uvec4, [32], "/* Channels used for virtual UFORCES elements */") \
+ X(focal_depths, f32, [256], vec4, [64], "/* [m] Focal Depths for each transmit of a RCA imaging scheme*/") \
+ X(transmit_angles, f32, [256], vec4, [64], "/* [radians] Transmit Angles for each transmit of a RCA imaging scheme*/") \
+ X(xdc_transform, f32, [16] , mat4, , "/* IMPORTANT: column major order */") \
+ X(dec_data_dim, uv4, , uvec4, , "/* Samples * Channels * Acquisitions; last element ignored */") \
+ X(xdc_element_pitch, f32, [2] , vec2, , "/* [m] Transducer Element Pitch {row, col} */") \
+ X(rf_raw_dim, uv2, , uvec2, , "/* Raw Data Dimensions */") \
+ X(transmit_mode, i32, , int, , "/* Method/Orientation of Transmit */") \
+ X(decode, u32, , uint, , "/* Decode or just reshape data */") \
+ X(das_shader_id, u32, , uint, , "") \
+ X(time_offset, f32, , float, , "/* pulse length correction time [s] */")
+
#define BEAMFORMER_PARAMS_TAIL \
X(readi_group_id, u32, , uint, , "/* Which readi group this data is from */") \
X(readi_group_size, u32, , uint, , "/* Size of readi transmit group */")
#define X(name, type, size, gltype, glsize, comment) type name size;
-typedef struct { BEAMFORMER_UI_PARAMS } BeamformerUIParameters;
+typedef struct { BEAMFORMER_UI_PARAMS } BeamformerUIParameters;
+typedef struct { BEAMFORMER_PARAMS_HEAD_V0 } BeamformerFixedParametersV0;
+
+typedef struct {
+ BEAMFORMER_PARAMS_HEAD_V0
+ BEAMFORMER_UI_PARAMS
+ BEAMFORMER_PARAMS_TAIL
+ f32 _pad[3];
+} BeamformerParametersV0;
/* NOTE: This struct follows the OpenGL std140 layout. DO NOT modify unless you have
* read and understood the rules, particulary with regards to _member alignment_ */
diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h
@@ -73,8 +73,11 @@ typedef struct {
/* TODO(rnp): probably remove this */
c8 export_pipe_name[256];
- u16 channel_mapping[256];
- u16 sparse_elements[256];
+ i32 channel_mapping_sync;
+ i32 sparse_elements_sync;
+
+ i16 channel_mapping[256];
+ i16 sparse_elements[256];
v2 transmit_angles_focal_depths[256];
BeamformWorkQueue external_work_queue;
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -253,15 +253,43 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count)
}
b32
-set_beamformer_parameters(char *shm_name, BeamformerParameters *new_bp)
+beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms)
{
- if (!check_shared_memory(shm_name))
- return 0;
+ b32 result = check_shared_memory(shm_name) && count <= ARRAY_COUNT(g_bp->channel_mapping);
+ if (result) {
+ BeamformWork *work = beamform_work_queue_push(&g_bp->external_work_queue);
+ if (work) {
+ /* TODO(rnp): refactor */
+ for (;;) {
+ i32 current = atomic_load(&g_bp->channel_mapping_sync);
+ if (current) {
+ atomic_inc(&g_bp->channel_mapping_sync, -current);
+ break;
+ }
+ os_wait_on_value(&g_bp->channel_mapping_sync, 0, timeout_ms);
+ }
+ work->type = BW_UPLOAD_CHANNEL_MAPPING;
+ work->completion_barrier = offsetof(BeamformerSharedMemory, channel_mapping_sync);
+ mem_copy(g_bp->channel_mapping, mapping, count * sizeof(*mapping));
- mem_copy(&g_bp->raw, new_bp, sizeof(BeamformerParameters));
- g_bp->upload = 1;
+ beamform_work_queue_push_commit(&g_bp->external_work_queue);
+ }
+ }
+ return result;
+}
- return 1;
+b32
+set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *new_bp)
+{
+ b32 result = 0;
+ result |= beamformer_push_channel_mapping(shm_name, (i16 *)new_bp->channel_mapping,
+ ARRAY_COUNT(new_bp->channel_mapping), 0);
+ if (result) {
+ mem_copy(&g_bp->raw, &new_bp->uforces_channels, sizeof(BeamformerParameters));
+ g_bp->upload = 1;
+ }
+
+ return result;
}
b32
diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h
@@ -20,12 +20,17 @@ typedef struct { f32 x, y, z, w; } v4;
#define LIB_FN
#endif
-LIB_FN b32 set_beamformer_parameters(char *shm_name, BeamformerParameters *);
-LIB_FN b32 set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count);
+/* IMPORTANT: timeout of -1 will block forever */
+LIB_FN b32 set_beamformer_parameters(char *shm_name, BeamformerParametersV0 *);
+LIB_FN b32 set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count);
LIB_FN b32 send_data(char *pipe_name, char *shm_name, void *data, u32 data_size);
/* NOTE: sends data and waits for (complex) beamformed data to be returned.
* out_data: must be allocated by the caller as 2 f32s per output point. */
LIB_FN b32 beamform_data_synchronized(char *pipe_name, char *shm_name, void *data, u32 data_size,
uv4 output_points, f32 *out_data, i32 timeout_ms);
+
+/* NOTE: these functions only queue an upload; you must flush (for now via one of the data functions) */
+
+LIB_FN b32 beamformer_push_channel_mapping(char *shm_name, i16 *mapping, u32 count, i32 timeout_ms);
diff --git a/shaders/decode.glsl b/shaders/decode.glsl
@@ -23,9 +23,10 @@ layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
vec2 out_data[];
};
-layout(r8i, binding = 0) readonly restrict uniform iimage2D hadamard;
+layout(r8i, binding = 0) readonly restrict uniform iimage2D hadamard;
+layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping;
-INPUT_DATA_TYPE sample_rf_data(uint index, uint lfs)
+INPUT_DATA_TYPE sample_rf_data(int index, uint lfs)
{
INPUT_DATA_TYPE result;
#if defined(INPUT_DATA_TYPE_FLOAT)
@@ -45,27 +46,23 @@ void main()
* does the dot product with the equivalent row of the hadamard matrix.
* The result is stored to the equivalent row, column index of the output.
*/
- uint time_sample = gl_GlobalInvocationID.x;
- uint channel = gl_GlobalInvocationID.y;
- uint acq = gl_GlobalInvocationID.z;
+ int time_sample = int(gl_GlobalInvocationID.x);
+ int channel = int(gl_GlobalInvocationID.y);
+ int acq = int(gl_GlobalInvocationID.z);
/* NOTE: offsets for storing the results in the output data */
uint out_off = dec_data_dim.x * dec_data_dim.y * acq + dec_data_dim.x * channel + time_sample;
- /* NOTE: channel mapping is stored as u16s so we must do this to extract the final value */
- uint ch_array_idx = (channel / 8);
- uint ch_vec_idx = (channel % 8) / 2;
- uint ch_elem_lfs = ((~channel) & 1u) * 16;
- uint rf_channel = (channel_mapping[ch_array_idx][ch_vec_idx] << ch_elem_lfs) >> 16;
+ int rf_channel = imageLoad(channel_mapping, channel).x;
/* NOTE: stride is the number of samples between acquistions; off is the
* index of the first acquisition for this channel and time sample */
- uint rf_stride = dec_data_dim.x;
- uint rf_off = rf_raw_dim.x * rf_channel + time_sample;
+ int rf_stride = int(dec_data_dim.x);
+ int rf_off = int(rf_raw_dim.x) * rf_channel + time_sample;
/* NOTE: rf_data index and stride considering the data is i16 not i32 */
- uint ridx = rf_off / RF_SAMPLES_PER_INDEX;
- uint ridx_delta = rf_stride / RF_SAMPLES_PER_INDEX;
+ int ridx = rf_off / RF_SAMPLES_PER_INDEX;
+ int ridx_delta = rf_stride / RF_SAMPLES_PER_INDEX;
/* NOTE: rf_data is i16 so each access grabs two time samples at time.
* We need to shift arithmetically (maintaining the sign) to get the
diff --git a/static.c b/static.c
@@ -305,7 +305,9 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
ctx->shared_memory = os_open_shared_memory_area(OS_SMEM_NAME, BEAMFORMER_SHARED_MEMORY_SIZE);
if (!ctx->shared_memory)
os_fatal(s8("Get more ram lol\n"));
- ctx->shared_memory->raw_data_sync = 1;
+ /* TODO(rnp): refactor - this is annoying */
+ ctx->shared_memory->raw_data_sync = 1;
+ ctx->shared_memory->channel_mapping_sync = 1;
/* NOTE: default compute shader pipeline */
ctx->shared_memory->compute_stages[0] = CS_DECODE;