ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (23513B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #include "../util.h"
      5 #include "../generated/beamformer.meta.c"
      6 #include "../beamformer_parameters.h"
      7 #include "ogl_beamformer_lib_base.h"
      8 
      9 #if OS_LINUX
     10 #include "../os_linux.c"
     11 #elif OS_WINDOWS
     12 #include "../os_win32.c"
     13 
     14 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     15 
     16 #else
     17 #error Unsupported Platform
     18 #endif
     19 
     20 #include "../beamformer_shared_memory.c"
     21 
     22 global struct {
     23 	SharedMemoryRegion      shared_memory;
     24 	BeamformerSharedMemory *bp;
     25 	i32                     timeout_ms;
     26 	BeamformerLibErrorKind  last_error;
     27 } g_beamformer_library_context;
     28 
     29 #if OS_LINUX
     30 
     31 function b32
     32 os_reserve_region_locks(iptr os_context, u32 count)
     33 {
     34 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     35 	return result;
     36 }
     37 
     38 function SharedMemoryRegion
     39 os_open_shared_memory_area(char *name)
     40 {
     41 	SharedMemoryRegion result = {0};
     42 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     43 	if (fd > 0) {
     44 		void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     45 		if (new != MAP_FAILED) result.region = new;
     46 		close(fd);
     47 	}
     48 	return result;
     49 }
     50 
     51 #elif OS_WINDOWS
     52 
     53 function b32
     54 os_reserve_region_locks(iptr os_context, u32 count)
     55 {
     56 	local_persist iptr semaphores[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
     57 	w32_shared_memory_context *ctx = (typeof(ctx))os_context;
     58 
     59 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     60 	if (result) {
     61 		count += BeamformerSharedMemoryLockKind_Count;
     62 		if (count > ctx->reserved_count) {
     63 			u8 buffer[1024];
     64 			Stream sb = {.data = buffer, .cap = countof(buffer)};
     65 			stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_"));
     66 
     67 			u32 new_reserved_count;
     68 			for (new_reserved_count = ctx->reserved_count;
     69 			     new_reserved_count < count && result;
     70 			     new_reserved_count++)
     71 			{
     72 				Stream lb = sb;
     73 				stream_append_u64(&lb, new_reserved_count);
     74 				stream_append_byte(&lb, 0);
     75 				semaphores[new_reserved_count] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data);
     76 				result &= semaphores[new_reserved_count] != INVALID_FILE;
     77 			}
     78 
     79 			if (result) {
     80 				ctx->semaphores     = semaphores;
     81 				ctx->reserved_count = count;
     82 			} else {
     83 				for (u32 j = ctx->reserved_count; j < new_reserved_count; j++)
     84 					CloseHandle(semaphores[j]);
     85 			}
     86 		} else if (count < ctx->reserved_count) {
     87 			for (u32 i = ctx->reserved_count; i > count;)
     88 				CloseHandle(semaphores[--i]);
     89 			ctx->reserved_count = count;
     90 		}
     91 	}
     92 	return result;
     93 }
     94 
     95 function SharedMemoryRegion
     96 os_open_shared_memory_area(char *name)
     97 {
     98 	local_persist w32_shared_memory_context ctx = {0};
     99 	SharedMemoryRegion result = {0};
    100 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
    101 	if (h != INVALID_FILE) {
    102 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE);
    103 		if (new && os_reserve_region_locks((iptr)&ctx, 1)) {
    104 			result.region     = new;
    105 			result.os_context = (iptr)&ctx;
    106 		}
    107 		CloseHandle(h);
    108 	}
    109 	return result;
    110 }
    111 
    112 #endif
    113 
    114 function b32
    115 lib_error_check(b32 condition, BeamformerLibErrorKind error_kind)
    116 {
    117 	b32 result = condition;
    118 	if (!result) g_beamformer_library_context.last_error = error_kind;
    119 	return result;
    120 }
    121 
    122 function b32
    123 check_shared_memory(void)
    124 {
    125 	if (!g_beamformer_library_context.shared_memory.region) {
    126 		g_beamformer_library_context.shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
    127 		if (lib_error_check(g_beamformer_library_context.shared_memory.region != 0, BF_LIB_ERR_KIND_SHARED_MEMORY)) {
    128 			u32 version = ((BeamformerSharedMemory *)g_beamformer_library_context.shared_memory.region)->version;
    129 			if (lib_error_check(version == BEAMFORMER_SHARED_MEMORY_VERSION, BF_LIB_ERR_KIND_VERSION_MISMATCH))
    130 				g_beamformer_library_context.bp = g_beamformer_library_context.shared_memory.region;
    131 		}
    132 	}
    133 
    134 	b32 result = 0;
    135 	if (g_beamformer_library_context.bp)
    136 		result = lib_error_check(!g_beamformer_library_context.bp->invalid, BF_LIB_ERR_KIND_INVALID_ACCESS);
    137 	return result;
    138 }
    139 
    140 function b32
    141 valid_parameter_block(u32 block)
    142 {
    143 	b32 result = check_shared_memory();
    144 	if (result) {
    145 		result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    146 		                         BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED);
    147 	}
    148 	return result;
    149 }
    150 
    151 function BeamformWork *
    152 try_push_work_queue(void)
    153 {
    154 	BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue);
    155 	lib_error_check(result != 0, BF_LIB_ERR_KIND_WORK_QUEUE_FULL);
    156 	return result;
    157 }
    158 
    159 function b32
    160 lib_try_lock(i32 lock, i32 timeout_ms)
    161 {
    162 	b32 result = os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory,
    163 	                                          g_beamformer_library_context.bp->locks,
    164 	                                          lock, (u32)timeout_ms);
    165 	lib_error_check(result, BF_LIB_ERR_KIND_SYNC_VARIABLE);
    166 	return result;
    167 }
    168 
    169 function void
    170 lib_release_lock(i32 lock)
    171 {
    172 	os_shared_memory_region_unlock(&g_beamformer_library_context.shared_memory,
    173 	                               g_beamformer_library_context.bp->locks, (i32)lock);
    174 }
    175 
    176 u32
    177 beamformer_get_api_version(void)
    178 {
    179 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    180 }
    181 
    182 const char *
    183 beamformer_error_string(BeamformerLibErrorKind kind)
    184 {
    185 	#define X(type, num, string) string,
    186 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    187 	#undef X
    188 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    189 }
    190 
    191 BeamformerLibErrorKind
    192 beamformer_get_last_error(void)
    193 {
    194 	return g_beamformer_library_context.last_error;
    195 }
    196 
    197 const char *
    198 beamformer_get_last_error_string(void)
    199 {
    200 	return beamformer_error_string(beamformer_get_last_error());
    201 }
    202 
    203 b32
    204 beamformer_set_global_timeout(i32 timeout_ms)
    205 {
    206 	b32 result = lib_error_check(timeout_ms >= -1, BF_LIB_ERR_KIND_INVALID_TIMEOUT);
    207 	if (result) g_beamformer_library_context.timeout_ms = timeout_ms;
    208 	return result;
    209 }
    210 
    211 b32
    212 beamformer_reserve_parameter_blocks(uint32_t count)
    213 {
    214 	b32 result = 0;
    215 	if (check_shared_memory() &&
    216 	    lib_error_check(os_reserve_region_locks(g_beamformer_library_context.shared_memory.os_context, count),
    217 	                    BF_LIB_ERR_KIND_PARAMETER_BLOCK_OVERFLOW))
    218 	{
    219 		u32 old_count = g_beamformer_library_context.bp->reserved_parameter_blocks;
    220 		g_beamformer_library_context.bp->reserved_parameter_blocks = count;
    221 		for (u32 i = old_count; i < count; i++)
    222 			zero_struct(beamformer_parameter_block(g_beamformer_library_context.bp, i));
    223 		result = 1;
    224 	}
    225 	return result;
    226 }
    227 
    228 function b32
    229 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    230 {
    231 	b32 result = lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW);
    232 	if (result) {
    233 		for (u32 i = 0; i < shader_count; i++)
    234 			result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
    235 		if (!result) {
    236 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
    237 		} else if (shaders[0] != BeamformerShaderKind_Demodulate &&
    238 		           shaders[0] != BeamformerShaderKind_Decode)
    239 		{
    240 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_START_SHADER;
    241 			result = 0;
    242 		} else if (shaders[0] == BeamformerShaderKind_Demodulate &&
    243 		           !(data_kind == BeamformerDataKind_Int16 || data_kind == BeamformerDataKind_Float32))
    244 		{
    245 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_DEMOD_DATA_KIND;
    246 			result = 0;
    247 		}
    248 	}
    249 	return result;
    250 }
    251 
    252 function b32
    253 validate_simple_parameters(BeamformerSimpleParameters *bp)
    254 {
    255 	b32 result = check_shared_memory();
    256 	if (result) {
    257 		result &= bp->channel_count <= BeamformerMaxChannelCount;
    258 		if (!result)
    259 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_SIMPLE_PARAMETERS;
    260 	}
    261 	return result;
    262 }
    263 
    264 function b32
    265 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
    266                               u32 block_offset, i32 timeout_ms)
    267 {
    268 	i32 lock   = BeamformerSharedMemoryLockKind_Count + (i32)block;
    269 	b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms);
    270 	if (result) {
    271 		mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset,
    272 		         data, size);
    273 		mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id);
    274 		lib_release_lock(lock);
    275 	}
    276 	return result;
    277 }
    278 
    279 b32
    280 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block)
    281 {
    282 	u32 offset  = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline];
    283 	offset     += offsetof(BeamformerComputePipeline, parameters);
    284 	offset     += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters);
    285 	b32 result  = parameter_block_region_upload(&parameter, sizeof(BeamformerShaderParameters), block,
    286 	                                            BeamformerParameterBlockRegion_ComputePipeline, offset,
    287 	                                            g_beamformer_library_context.timeout_ms);
    288 	return result;
    289 }
    290 
    291 b32
    292 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter)
    293 {
    294 	b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0);
    295 	return result;
    296 }
    297 
    298 b32
    299 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block)
    300 {
    301 	b32 result = 0;
    302 	if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) {
    303 		i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block;
    304 		if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) {
    305 			BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    306 			mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders));
    307 			mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block,
    308 			                                  BeamformerParameterBlockRegion_ComputePipeline);
    309 			b->pipeline.shader_count = shader_count;
    310 			b->pipeline.data_kind    = data_kind;
    311 			lib_release_lock(lock);
    312 			result = 1;
    313 		}
    314 	}
    315 	return result;
    316 }
    317 
    318 b32
    319 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    320 {
    321 	b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0);
    322 	return result;
    323 }
    324 
    325 function b32
    326 beamformer_create_filter_base(BeamformerFilterKind kind, BeamformerFilterParameters params, u8 filter_slot, u8 parameter_block)
    327 {
    328 	b32 result = 0;
    329 	if (check_shared_memory()) {
    330 		BeamformWork *work = try_push_work_queue();
    331 		if (work) {
    332 			BeamformerCreateFilterContext *ctx = &work->create_filter_context;
    333 			work->kind = BeamformerWorkKind_CreateFilter;
    334 			ctx->kind            = kind;
    335 			ctx->parameters      = params;
    336 			ctx->filter_slot     = filter_slot     % BeamformerFilterSlots;
    337 			ctx->parameter_block = parameter_block % BeamformerMaxParameterBlockSlots;
    338 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    339 			result = 1;
    340 		}
    341 	}
    342 	return result;
    343 }
    344 
    345 b32
    346 beamformer_create_filter(BeamformerFilterKind kind, f32 *filter_parameters, u32 filter_parameter_count,
    347                          f32 sampling_frequency, b32 complex, u8 filter_slot, u8 parameter_block)
    348 {
    349 	b32 result = 0;
    350 	if (lib_error_check(kind >= 0 && kind < BeamformerFilterKind_Count, BF_LIB_ERR_KIND_INVALID_FILTER_KIND)) {
    351 		BeamformerFilterParameters fp = {.sampling_frequency = sampling_frequency, .complex = complex != 0};
    352 		#define X(kind, ...) sizeof(fp.kind),
    353 		read_only local_persist u32 kind_sizes[] = {BEAMFORMER_FILTER_KIND_LIST(,)};
    354 		#undef X
    355 		if (lib_error_check(kind_sizes[kind] == sizeof(f32) * filter_parameter_count,
    356 		                    BF_LIB_ERR_KIND_INVALID_FILTER_PARAM_COUNT))
    357 		{
    358 			/* NOTE(rnp): any filter kind struct works as base offset of union */
    359 			mem_copy(&fp.Kaiser, filter_parameters, kind_sizes[kind]);
    360 			result = beamformer_create_filter_base(kind, fp, filter_slot, parameter_block);
    361 		}
    362 	}
    363 	return result;
    364 }
    365 
    366 function b32
    367 beamformer_flush_commands(i32 timeout_ms)
    368 {
    369 	b32 result = lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms);
    370 	return result;
    371 }
    372 
    373 #define BEAMFORMER_UPLOAD_FNS \
    374 	X(channel_mapping,               i16, 1, ChannelMapping) \
    375 	X(focal_vectors,                 f32, 2, FocalVectors)   \
    376 	X(sparse_elements,               i16, 1, SparseElements) \
    377 	X(transmit_receive_orientations, u8,  1, TransmitReceiveOrientations)
    378 
    379 #define X(name, dtype, elements, region_name) \
    380 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \
    381 	b32 result = 0; \
    382 	if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BF_LIB_ERR_KIND_BUFFER_OVERFLOW)) { \
    383 		result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \
    384 		                                       BeamformerParameterBlockRegion_##region_name,  \
    385 		                                       offsetof(BeamformerParameterBlock, name),      \
    386 		                                       g_beamformer_library_context.timeout_ms);      \
    387 	} \
    388 	return result; \
    389 }
    390 BEAMFORMER_UPLOAD_FNS
    391 #undef X
    392 
    393 #define X(name, dtype, ...) \
    394 b32 beamformer_push_##name (dtype *data, u32 count) { \
    395 	b32 result = beamformer_push_##name ##_at(data, count, 0); \
    396 	return result; \
    397 }
    398 BEAMFORMER_UPLOAD_FNS
    399 #undef X
    400 
    401 function b32
    402 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block)
    403 {
    404 	b32 result = 0;
    405 	Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    406 	BeamformerParameterBlock *b  = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    407 	BeamformerParameters     *bp = &b->parameters;
    408 	BeamformerDataKind data_kind = b->pipeline.data_kind;
    409 
    410 	u32 size     = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind];
    411 	u32 raw_size = bp->raw_data_dimensions[0] * bp->raw_data_dimensions[1] * beamformer_data_kind_byte_size[data_kind];
    412 
    413 	if (lib_error_check(size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_BUFFER_OVERFLOW) &&
    414 	    lib_error_check(size <= data_size && data_size == raw_size, BF_LIB_ERR_KIND_DATA_SIZE_MISMATCH))
    415 	{
    416 		if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
    417 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    418 				u32 channel_count      = bp->channel_count;
    419 				u32 out_channel_stride = beamformer_data_kind_element_count[data_kind] * bp->sample_count * bp->acquisition_count;
    420 				u32 in_channel_stride  = beamformer_data_kind_element_count[data_kind] * bp->raw_data_dimensions[0];
    421 
    422 				for (u32 channel = 0; channel < channel_count; channel++) {
    423 					u16 data_channel = (u16)b->channel_mapping[channel];
    424 					u32 out_off = out_channel_stride * channel;
    425 					u32 in_off  = in_channel_stride  * data_channel;
    426 					for (u32 sample = 0; sample < out_channel_stride; sample++, out_off++, in_off++) {
    427 						switch (data_kind) {
    428 						case BeamformerDataKind_Int16:
    429 						case BeamformerDataKind_Int16Complex:
    430 						{
    431 							((i16 *)scratch.beg)[out_off] = ((i16 *)data)[in_off];
    432 						}break;
    433 						case BeamformerDataKind_Float32:
    434 						case BeamformerDataKind_Float32Complex:
    435 						{
    436 							((f32 *)scratch.beg)[out_off] = ((f32 *)data)[in_off];
    437 						}break;
    438 						InvalidDefaultCase;
    439 						}
    440 					}
    441 				}
    442 
    443 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    444 				/* TODO(rnp): need a better way to communicate this */
    445 				atomic_store_u32(&g_beamformer_library_context.bp->scratch_rf_size, size);
    446 				result = 1;
    447 			}
    448 		}
    449 	}
    450 	return result;
    451 }
    452 
    453 b32
    454 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot)
    455 {
    456 	b32 result = 0;
    457 	if (check_shared_memory()) {
    458 		u32 reserved_blocks = g_beamformer_library_context.bp->reserved_parameter_blocks;
    459 		if (lib_error_check(image_plane_tag < BeamformerViewPlaneTag_Count, BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE) &&
    460 		    lib_error_check(parameter_slot < reserved_blocks, BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED) &&
    461 		    beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms, parameter_slot))
    462 		{
    463 			BeamformWork *work = try_push_work_queue();
    464 			if (work) {
    465 				work->kind = BeamformerWorkKind_ComputeIndirect;
    466 				work->compute_indirect_context.view_plane      = image_plane_tag;
    467 				work->compute_indirect_context.parameter_block = parameter_slot;
    468 				beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    469 				beamformer_flush_commands(0);
    470 				result = 1;
    471 			}
    472 		}
    473 	}
    474 	return result;
    475 }
    476 
    477 b32
    478 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block)
    479 {
    480 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), block,
    481 	                                           BeamformerParameterBlockRegion_Parameters,
    482 	                                           offsetof(BeamformerParameterBlock, parameters),
    483 	                                           g_beamformer_library_context.timeout_ms);
    484 	return result;
    485 }
    486 
    487 b32
    488 beamformer_push_parameters(BeamformerParameters *bp)
    489 {
    490 	b32 result = beamformer_push_parameters_at(bp, 0);
    491 	return result;
    492 }
    493 
    494 b32
    495 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block)
    496 {
    497 	b32 result = validate_simple_parameters(bp);
    498 	if (result) {
    499 		alignas(64) v2 focal_vectors[countof(bp->steering_angles)];
    500 		for (u32 i = 0; i < countof(bp->steering_angles); i++)
    501 			focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}};
    502 
    503 		result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block);
    504 		result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block);
    505 		result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block);
    506 		result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block);
    507 		result &= beamformer_push_transmit_receive_orientations_at(bp->transmit_receive_orientations,
    508 		                                                           bp->acquisition_count, block);
    509 
    510 		if (bp->das_shader_id == BeamformerAcquisitionKind_UFORCES || bp->das_shader_id == BeamformerAcquisitionKind_UHERCULES)
    511 			result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block);
    512 
    513 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++)
    514 			result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block);
    515 	}
    516 	return result;
    517 }
    518 
    519 b32
    520 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp)
    521 {
    522 	b32 result = beamformer_push_simple_parameters_at(bp, 0);
    523 	return result;
    524 }
    525 
    526 b32
    527 beamformer_push_parameters_ui(BeamformerUIParameters *bp)
    528 {
    529 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    530 	                                           offsetof(BeamformerParameterBlock, parameters_ui),
    531 	                                           g_beamformer_library_context.timeout_ms);
    532 	return result;
    533 }
    534 
    535 b32
    536 beamformer_push_parameters_head(BeamformerParametersHead *bp)
    537 {
    538 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    539 	                                           offsetof(BeamformerParameterBlock, parameters_head),
    540 	                                           g_beamformer_library_context.timeout_ms);
    541 	return result;
    542 }
    543 
    544 function b32
    545 beamformer_export_buffer(BeamformerExportContext export_context)
    546 {
    547 	BeamformWork *work = try_push_work_queue();
    548 	b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0);
    549 	if (result) {
    550 		work->export_context = export_context;
    551 		work->kind = BeamformerWorkKind_ExportBuffer;
    552 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    553 		beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    554 	}
    555 	return result;
    556 }
    557 
    558 function b32
    559 beamformer_export(BeamformerExportContext export, void *out, i32 timeout_ms)
    560 {
    561 	b32 result = 0;
    562 	if (beamformer_export_buffer(export)) {
    563 		/* NOTE(rnp): if this fails it just means that the work from push_data hasn't
    564 		 * started yet. This is here to catch the other case where the work started
    565 		 * and finished before we finished queuing the export work item */
    566 		beamformer_flush_commands(0);
    567 
    568 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    569 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    570 				Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    571 				mem_copy(out, scratch.beg, export.size);
    572 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    573 				result = 1;
    574 			}
    575 			lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync);
    576 		}
    577 	}
    578 	return result;
    579 }
    580 
    581 b32
    582 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size,
    583                          void *out_data, int32_t timeout_ms)
    584 {
    585 	b32 result = validate_simple_parameters(bp);
    586 	if (result) {
    587 		bp->output_points[0] = MAX(1, bp->output_points[0]);
    588 		bp->output_points[1] = MAX(1, bp->output_points[1]);
    589 		bp->output_points[2] = MAX(1, bp->output_points[2]);
    590 
    591 		beamformer_push_simple_parameters(bp);
    592 
    593 		b32 complex = 0;
    594 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
    595 			BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
    596 			complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
    597 		}
    598 
    599 		iz output_size = bp->output_points[0] * bp->output_points[1] * bp->output_points[2] * (i32)sizeof(f32);
    600 		if (complex) output_size *= 2;
    601 
    602 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    603 		if (result && lib_error_check(output_size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)
    604 		    && beamformer_push_data_with_compute(data, data_size, 0, 0))
    605 		{
    606 			BeamformerExportContext export;
    607 			export.kind = BeamformerExportKind_BeamformedData;
    608 			export.size = (u32)output_size;
    609 			result = beamformer_export(export, out_data, timeout_ms);
    610 		}
    611 	}
    612 	return result;
    613 }
    614 
    615 b32
    616 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    617 {
    618 	static_assert(sizeof(*output) <= BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE,
    619 	              "timing table size exceeds scratch space");
    620 
    621 	b32 result = 0;
    622 	if (check_shared_memory()) {
    623 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    624 		if (lib_error_check((iz)sizeof(*output) <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)) {
    625 			BeamformerExportContext export;
    626 			export.kind = BeamformerExportKind_Stats;
    627 			export.size = sizeof(*output);
    628 			result = beamformer_export(export, output, timeout_ms);
    629 		}
    630 	}
    631 	return result;
    632 }
    633 
    634 i32
    635 beamformer_live_parameters_get_dirty_flag(void)
    636 {
    637 	i32 result = -1;
    638 	if (check_shared_memory()) {
    639 		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
    640 		if (flag != 32) {
    641 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
    642 			result = (i32)flag;
    643 		}
    644 	}
    645 	return result;
    646 }
    647 
    648 BeamformerLiveImagingParameters *
    649 beamformer_get_live_parameters(void)
    650 {
    651 	BeamformerLiveImagingParameters *result = 0;
    652 	if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters;
    653 	return result;
    654 }
    655 
    656 b32
    657 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    658 {
    659 	b32 result = 0;
    660 	if (check_shared_memory()) {
    661 		mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new));
    662 		memory_write_barrier();
    663 		result = 1;
    664 	}
    665 	return result;
    666 }