ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (22903B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #define BEAMFORMER_IMPORT static
      5 
      6 #include "../beamformer.h"
      7 
      8 #include "../util.h"
      9 
     10 #include "../generated/beamformer.meta.c"
     11 #include "../beamformer_parameters.h"
     12 #include "ogl_beamformer_lib_base.h"
     13 
     14 #if OS_LINUX
     15 #include "../os_linux.c"
     16 #elif OS_WINDOWS
     17 #include "../os_win32.c"
     18 
     19 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     20 
     21 #else
     22 #error Unsupported Platform
     23 #endif
     24 
     25 #include "../util_os.c"
     26 #include "../beamformer_shared_memory.c"
     27 
     28 global struct {
     29 	BeamformerSharedMemory *bp;
     30 	i32                     timeout_ms;
     31 	BeamformerLibErrorKind  last_error;
     32 	i64                     shared_memory_size;
     33 } g_beamformer_library_context;
     34 
     35 #if OS_LINUX
     36 
     37 function s8
     38 os_open_shared_memory_area(char *name)
     39 {
     40 	s8 result = {0};
     41 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     42 	if (fd > 0) {
     43 		struct stat sb;
     44 		if (fstat(fd, &sb) != -1) {
     45 			void *new = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     46 			if (new != MAP_FAILED) {
     47 				result.data = new;
     48 				result.len  = sb.st_size;
     49 			}
     50 		}
     51 		close(fd);
     52 	}
     53 	return result;
     54 }
     55 
     56 function void
     57 os_close_shared_memory_area(void *memory, i64 size)
     58 {
     59 	munmap(memory, size);
     60 }
     61 
     62 #elif OS_WINDOWS
     63 
     64 W32(u64) VirtualQuery(void *base_address, void *memory_basic_info, u64 memory_basic_info_size);
     65 W32(b32) UnmapViewOfFile(void *);
     66 
     67 function b32
     68 os_reserve_region_locks(void)
     69 {
     70 	u8 buffer[1024];
     71 	Stream sb = {.data = buffer, .cap = countof(buffer)};
     72 	stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_"));
     73 
     74 	i32 start_index    = sb.widx;
     75 	u32 reserved_count = 0;
     76 	for EachElement(os_w32_shared_memory_semaphores, it) {
     77 		stream_reset(&sb, start_index);
     78 		stream_append_u64(&sb, it);
     79 		stream_append_byte(&sb, 0);
     80 		os_w32_shared_memory_semaphores[it] = os_w32_create_semaphore((c8 *)sb.data, 1, 1);
     81 		if InvalidHandle(os_w32_shared_memory_semaphores[it])
     82 			break;
     83 		reserved_count++;
     84 	}
     85 
     86 	b32 result = reserved_count == countof(os_w32_shared_memory_semaphores);
     87 	if (!result) {
     88 		for (u32 i = 0; i < reserved_count; i++)
     89 			CloseHandle(os_w32_shared_memory_semaphores[i].value[0]);
     90 	}
     91 
     92 	return result;
     93 }
     94 
     95 function s8
     96 os_open_shared_memory_area(char *name)
     97 {
     98 	struct alignas(16) {
     99 		void *BaseAddress;
    100 		void *AllocationBase;
    101 		u32   AllocationProtect;
    102 		u32   __alignment1;
    103 		u64   RegionSize;
    104 		u32   State;
    105 		u32   Protect;
    106 		u32   Type;
    107 		u32   __alignment2;
    108 	} memory_basic_info;
    109 
    110 	s8 result = {0};
    111 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
    112 	if (h != INVALID_FILE) {
    113 		// NOTE(rnp): a size of 0 maps the whole region, we can determine its size after
    114 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0);
    115 		if (new &&
    116 		    VirtualQuery(new, &memory_basic_info, sizeof(memory_basic_info)) == sizeof(memory_basic_info) &&
    117 		    os_reserve_region_locks())
    118 		{
    119 			result.data = new;
    120 			result.len  = (i64)memory_basic_info.RegionSize;
    121 		}
    122 
    123 		if (new && !result.data)
    124 			UnmapViewOfFile(new);
    125 
    126 		CloseHandle(h);
    127 	}
    128 	return result;
    129 }
    130 
    131 function void
    132 os_close_shared_memory_area(void *memory, i64 size)
    133 {
    134 	UnmapViewOfFile(memory);
    135 }
    136 
    137 #endif
    138 
    139 #define lib_error_check(c, e) lib_error_check_(c, BeamformerLibErrorKind_##e)
    140 function b32
    141 lib_error_check_(b32 condition, BeamformerLibErrorKind error_kind)
    142 {
    143 	b32 result = condition;
    144 	if (!result) g_beamformer_library_context.last_error = error_kind;
    145 	assert(result);
    146 	return result;
    147 }
    148 
    149 function b32
    150 check_shared_memory(void)
    151 {
    152 	b32 result = g_beamformer_library_context.bp != 0;
    153 	if unlikely(!g_beamformer_library_context.bp) {
    154 		s8 shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
    155 		if (lib_error_check(shared_memory.data != 0, SharedMemory)) {
    156 			BeamformerSharedMemory *bp = (BeamformerSharedMemory *)shared_memory.data;
    157 			result = lib_error_check(bp->version == BEAMFORMER_SHARED_MEMORY_VERSION, VersionMismatch);
    158 			if (result) {
    159 				g_beamformer_library_context.bp                 = bp;
    160 				g_beamformer_library_context.shared_memory_size = shared_memory.len;
    161 			} else {
    162 				os_close_shared_memory_area(shared_memory.data, shared_memory.len);
    163 			}
    164 		}
    165 	}
    166 
    167 	if likely(g_beamformer_library_context.bp)
    168 		result = lib_error_check(likely(!g_beamformer_library_context.bp->invalid), InvalidAccess);
    169 	return result;
    170 }
    171 
    172 function b32
    173 valid_parameter_block(u32 block)
    174 {
    175 	b32 result = check_shared_memory();
    176 	if (result) {
    177 		result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    178 		                         ParameterBlockUnallocated);
    179 	}
    180 	return result;
    181 }
    182 
    183 function BeamformWork *
    184 try_push_work_queue(void)
    185 {
    186 	BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue);
    187 	lib_error_check(result != 0, WorkQueueFull);
    188 	return result;
    189 }
    190 
    191 function b32
    192 lib_try_lock(i32 lock, i32 timeout_ms)
    193 {
    194 	b32 result = beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, (u32)timeout_ms);
    195 	lib_error_check(result, SyncVariable);
    196 	return result;
    197 }
    198 
    199 function void
    200 lib_release_lock(i32 lock)
    201 {
    202 	beamformer_shared_memory_release_lock(g_beamformer_library_context.bp, lock);
    203 }
    204 
    205 u32
    206 beamformer_get_api_version(void)
    207 {
    208 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    209 }
    210 
    211 const char *
    212 beamformer_error_string(BeamformerLibErrorKind kind)
    213 {
    214 	#define X(type, num, string) string,
    215 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    216 	#undef X
    217 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    218 }
    219 
    220 BeamformerLibErrorKind
    221 beamformer_get_last_error(void)
    222 {
    223 	return g_beamformer_library_context.last_error;
    224 }
    225 
    226 const char *
    227 beamformer_get_last_error_string(void)
    228 {
    229 	return beamformer_error_string(beamformer_get_last_error());
    230 }
    231 
    232 void
    233 beamformer_set_global_timeout(u32 timeout_ms)
    234 {
    235 	g_beamformer_library_context.timeout_ms = timeout_ms;
    236 }
    237 
    238 b32
    239 beamformer_reserve_parameter_blocks(uint32_t count)
    240 {
    241 	b32 result = 0;
    242 	if (check_shared_memory() &&
    243 	    lib_error_check(count <= BeamformerMaxParameterBlockSlots, ParameterBlockOverflow))
    244 	{
    245 		g_beamformer_library_context.bp->reserved_parameter_blocks = count;
    246 		result = 1;
    247 	}
    248 	return result;
    249 }
    250 
    251 function b32
    252 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    253 {
    254 	b32 result = lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, ComputeStageOverflow);
    255 	if (result) {
    256 		for (u32 i = 0; i < shader_count; i++)
    257 			result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
    258 		if (!result) {
    259 			g_beamformer_library_context.last_error = BeamformerLibErrorKind_InvalidComputeStage;
    260 		} else if (shaders[0] != BeamformerShaderKind_Demodulate &&
    261 		           shaders[0] != BeamformerShaderKind_Decode)
    262 		{
    263 			g_beamformer_library_context.last_error = BeamformerLibErrorKind_InvalidStartShader;
    264 			result = 0;
    265 		} else if (shaders[0] == BeamformerShaderKind_Demodulate &&
    266 		           !(data_kind == BeamformerDataKind_Int16 || data_kind == BeamformerDataKind_Float32))
    267 		{
    268 			g_beamformer_library_context.last_error = BeamformerLibErrorKind_InvalidDemodulationDataKind;
    269 			result = 0;
    270 		}
    271 	}
    272 	return result;
    273 }
    274 
    275 function b32
    276 validate_simple_parameters(BeamformerSimpleParameters *bp)
    277 {
    278 	b32 result = check_shared_memory();
    279 	if (result) {
    280 		result &= bp->channel_count <= BeamformerMaxChannelCount;
    281 		if (!result)
    282 			g_beamformer_library_context.last_error = BeamformerLibErrorKind_InvalidSimpleParameters;
    283 	}
    284 	return result;
    285 }
    286 
    287 function b32
    288 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
    289                               u32 block_offset, i32 timeout_ms)
    290 {
    291 	i32 lock   = BeamformerSharedMemoryLockKind_Count + (i32)block;
    292 	b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms);
    293 	if (result) {
    294 		mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset,
    295 		         data, size);
    296 		mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id);
    297 		lib_release_lock(lock);
    298 	}
    299 	return result;
    300 }
    301 
    302 b32
    303 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block)
    304 {
    305 	u32 offset  = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline];
    306 	offset     += offsetof(BeamformerComputePipeline, parameters);
    307 	offset     += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters);
    308 	b32 result  = parameter_block_region_upload(&parameter, sizeof(BeamformerShaderParameters), block,
    309 	                                            BeamformerParameterBlockRegion_ComputePipeline, offset,
    310 	                                            g_beamformer_library_context.timeout_ms);
    311 	return result;
    312 }
    313 
    314 b32
    315 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter)
    316 {
    317 	b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0);
    318 	return result;
    319 }
    320 
    321 b32
    322 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block)
    323 {
    324 	b32 result = 0;
    325 	if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) {
    326 		i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block;
    327 		if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) {
    328 			BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    329 			mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders));
    330 			mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block,
    331 			                                  BeamformerParameterBlockRegion_ComputePipeline);
    332 			b->pipeline.shader_count = shader_count;
    333 			b->pipeline.data_kind    = data_kind;
    334 			lib_release_lock(lock);
    335 			result = 1;
    336 		}
    337 	}
    338 	return result;
    339 }
    340 
    341 b32
    342 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    343 {
    344 	b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0);
    345 	return result;
    346 }
    347 
    348 function b32
    349 beamformer_create_filter_base(BeamformerFilterParameters params, u8 filter_slot, u8 parameter_block)
    350 {
    351 	b32 result = 0;
    352 	if (check_shared_memory()) {
    353 		BeamformWork *work = try_push_work_queue();
    354 		if (work) {
    355 			BeamformerCreateFilterContext *ctx = &work->create_filter_context;
    356 			work->kind = BeamformerWorkKind_CreateFilter;
    357 			ctx->parameters      = params;
    358 			ctx->filter_slot     = filter_slot     % BeamformerFilterSlots;
    359 			ctx->parameter_block = parameter_block % BeamformerMaxParameterBlockSlots;
    360 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    361 			result = 1;
    362 		}
    363 	}
    364 	return result;
    365 }
    366 
    367 b32
    368 beamformer_create_filter(BeamformerFilterKind kind, void *filter_parameters, u32 filter_size,
    369                          f32 sampling_frequency, b32 complex, u8 filter_slot, u8 parameter_block)
    370 {
    371 	b32 result = 0;
    372 	if (lib_error_check(kind >= 0 && kind < BeamformerFilterKind_Count, InvalidFilterKind)) {
    373 		BeamformerFilterParameters fp = {0};
    374 		/* NOTE(rnp): any parameter struct works as base offset */
    375 		filter_size = MIN(filter_size, sizeof(fp) - offsetof(BeamformerFilterParameters, kaiser));
    376 		mem_copy(&fp.kaiser, filter_parameters, filter_size);
    377 		fp.kind               = kind;
    378 		fp.complex            = complex != 0;
    379 		fp.sampling_frequency = sampling_frequency;
    380 		result = beamformer_create_filter_base(fp, filter_slot, parameter_block);
    381 	}
    382 	return result;
    383 }
    384 
    385 function void
    386 beamformer_flush_commands(void)
    387 {
    388 	i32 lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    389 	beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, 0);
    390 }
    391 
    392 #define BEAMFORMER_UPLOAD_FNS \
    393 	X(channel_mapping,               i16, 1, ChannelMapping) \
    394 	X(focal_vectors,                 f32, 2, FocalVectors)   \
    395 	X(sparse_elements,               i16, 1, SparseElements) \
    396 	X(transmit_receive_orientations, u8,  1, TransmitReceiveOrientations)
    397 
    398 #define X(name, dtype, elements, region_name) \
    399 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \
    400 	b32 result = 0; \
    401 	if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BufferOverflow)) { \
    402 		result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \
    403 		                                       BeamformerParameterBlockRegion_##region_name,  \
    404 		                                       offsetof(BeamformerParameterBlock, name),      \
    405 		                                       g_beamformer_library_context.timeout_ms);      \
    406 	} \
    407 	return result; \
    408 }
    409 BEAMFORMER_UPLOAD_FNS
    410 #undef X
    411 
    412 #define X(name, dtype, ...) \
    413 b32 beamformer_push_##name (dtype *data, u32 count) { \
    414 	b32 result = beamformer_push_##name ##_at(data, count, 0); \
    415 	return result; \
    416 }
    417 BEAMFORMER_UPLOAD_FNS
    418 #undef X
    419 
    420 function b32
    421 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block)
    422 {
    423 	b32 result = 0;
    424 	Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    425 	                                                       g_beamformer_library_context.shared_memory_size);
    426 	BeamformerParameterBlock *b  = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    427 	BeamformerParameters     *bp = &b->parameters;
    428 	BeamformerDataKind data_kind = b->pipeline.data_kind;
    429 
    430 	u32 size     = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind];
    431 	u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind];
    432 
    433 	if (lib_error_check(size <= arena_capacity(&scratch, u8), BufferOverflow) &&
    434 	    lib_error_check(size <= data_size && data_size == raw_size, DataSizeMismatch))
    435 	{
    436 		if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
    437 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    438 				u32 channel_count      = bp->channel_count;
    439 				u32 out_channel_stride = beamformer_data_kind_byte_size[data_kind] * bp->sample_count * bp->acquisition_count;
    440 				u32 in_channel_stride  = beamformer_data_kind_byte_size[data_kind] * bp->raw_data_dimensions.x;
    441 
    442 				for (u32 channel = 0; channel < channel_count; channel++) {
    443 					u16 data_channel = (u16)b->channel_mapping[channel];
    444 					u32 out_off = out_channel_stride * channel;
    445 					u32 in_off  = in_channel_stride  * data_channel;
    446 					/* TODO(rnp): it would be better to do non temporal copy here, but we can't ensure
    447 					 * 64 byte boundaries. */
    448 					mem_copy(scratch.beg + out_off, (u8 *)data + in_off, out_channel_stride);
    449 				}
    450 
    451 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    452 				/* TODO(rnp): need a better way to communicate this */
    453 				u64 rf_block_rf_size = (u64)block << 32ULL | (u64)size;
    454 				atomic_store_u64(&g_beamformer_library_context.bp->rf_block_rf_size, rf_block_rf_size);
    455 				result = 1;
    456 			}
    457 		}
    458 	}
    459 	return result;
    460 }
    461 
    462 b32
    463 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot)
    464 {
    465 	b32 result = 0;
    466 	if (check_shared_memory()) {
    467 		u32 reserved_blocks = g_beamformer_library_context.bp->reserved_parameter_blocks;
    468 		if (lib_error_check(image_plane_tag < BeamformerViewPlaneTag_Count, InvalidImagePlane) &&
    469 		    lib_error_check(parameter_slot < reserved_blocks, ParameterBlockUnallocated) &&
    470 		    beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms, parameter_slot))
    471 		{
    472 			BeamformWork *work = try_push_work_queue();
    473 			if (work) {
    474 				work->kind = BeamformerWorkKind_ComputeIndirect;
    475 				work->compute_indirect_context.view_plane      = image_plane_tag;
    476 				work->compute_indirect_context.parameter_block = parameter_slot;
    477 				beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    478 				beamformer_flush_commands();
    479 				result = 1;
    480 			}
    481 		}
    482 	}
    483 	return result;
    484 }
    485 
    486 b32
    487 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block)
    488 {
    489 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), block,
    490 	                                           BeamformerParameterBlockRegion_Parameters,
    491 	                                           offsetof(BeamformerParameterBlock, parameters),
    492 	                                           g_beamformer_library_context.timeout_ms);
    493 	return result;
    494 }
    495 
    496 b32
    497 beamformer_push_parameters(BeamformerParameters *bp)
    498 {
    499 	b32 result = beamformer_push_parameters_at(bp, 0);
    500 	return result;
    501 }
    502 
    503 b32
    504 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block)
    505 {
    506 	b32 result = validate_simple_parameters(bp);
    507 	if (result) {
    508 		alignas(64) v2 focal_vectors[countof(bp->steering_angles)];
    509 		for (u32 i = 0; i < countof(bp->steering_angles); i++)
    510 			focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}};
    511 
    512 		result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block);
    513 		result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block);
    514 		result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block);
    515 		result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block);
    516 		result &= beamformer_push_transmit_receive_orientations_at(bp->transmit_receive_orientations,
    517 		                                                           bp->acquisition_count, block);
    518 
    519 		if (bp->acquisition_kind == BeamformerAcquisitionKind_UFORCES ||
    520 		    bp->acquisition_kind == BeamformerAcquisitionKind_UHERCULES)
    521 		{
    522 			result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block);
    523 		}
    524 
    525 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++)
    526 			result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block);
    527 	}
    528 	return result;
    529 }
    530 
    531 b32
    532 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp)
    533 {
    534 	b32 result = beamformer_push_simple_parameters_at(bp, 0);
    535 	return result;
    536 }
    537 
    538 b32
    539 beamformer_push_parameters_ui(BeamformerUIParameters *bp)
    540 {
    541 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    542 	                                           offsetof(BeamformerParameterBlock, parameters_ui),
    543 	                                           g_beamformer_library_context.timeout_ms);
    544 	return result;
    545 }
    546 
    547 b32
    548 beamformer_push_parameters_head(BeamformerParametersHead *bp)
    549 {
    550 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    551 	                                           offsetof(BeamformerParameterBlock, parameters_head),
    552 	                                           g_beamformer_library_context.timeout_ms);
    553 	return result;
    554 }
    555 
    556 function b32
    557 beamformer_export_buffer(BeamformerExportContext export_context)
    558 {
    559 	BeamformWork *work = try_push_work_queue();
    560 	b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0);
    561 	if (result) {
    562 		work->export_context = export_context;
    563 		work->kind = BeamformerWorkKind_ExportBuffer;
    564 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    565 		beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    566 	}
    567 	return result;
    568 }
    569 
    570 function b32
    571 beamformer_export(BeamformerExportContext export, void *out, i32 timeout_ms)
    572 {
    573 	b32 result = 0;
    574 	if (beamformer_export_buffer(export)) {
    575 		/* NOTE(rnp): if this fails it just means that the work from push_data hasn't
    576 		 * started yet. This is here to catch the other case where the work started
    577 		 * and finished before we finished queuing the export work item */
    578 		beamformer_flush_commands();
    579 
    580 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    581 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    582 				Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    583 				                                                       g_beamformer_library_context.shared_memory_size);
    584 				mem_copy(out, scratch.beg, export.size);
    585 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    586 				result = 1;
    587 			}
    588 			lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync);
    589 		}
    590 	}
    591 	return result;
    592 }
    593 
    594 b32
    595 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size,
    596                          void *out_data, int32_t timeout_ms)
    597 {
    598 	b32 result = beamformer_push_simple_parameters(bp);
    599 	if (result) {
    600 		iv3 output_points = bp->output_points.xyz;
    601 		output_points.E[0] = Max(1, output_points.E[0]);
    602 		output_points.E[1] = Max(1, output_points.E[1]);
    603 		output_points.E[2] = Max(1, output_points.E[2]);
    604 
    605 		b32 complex = 0;
    606 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
    607 			BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
    608 			complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
    609 		}
    610 
    611 		iz output_size = output_points.x * output_points.y * output_points.z * (i32)sizeof(f32);
    612 		if (complex) output_size *= 2;
    613 
    614 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    615 		                                                       g_beamformer_library_context.shared_memory_size);
    616 		if (out_data) result &= lib_error_check(output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow);
    617 
    618 		if (result) {
    619 			result = beamformer_push_data_with_compute(data, data_size, 0, 0);
    620 			if (result && out_data) {
    621 				BeamformerExportContext export;
    622 				export.kind = BeamformerExportKind_BeamformedData;
    623 				export.size = (u32)output_size;
    624 				result = beamformer_export(export, out_data, timeout_ms);
    625 			}
    626 		}
    627 	}
    628 	return result;
    629 }
    630 
    631 b32
    632 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    633 {
    634 	b32 result = 0;
    635 	if (check_shared_memory()) {
    636 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    637 		                                                       g_beamformer_library_context.shared_memory_size);
    638 		if (lib_error_check((iz)sizeof(*output) <= arena_capacity(&scratch, u8), ExportSpaceOverflow)) {
    639 			BeamformerExportContext export;
    640 			export.kind = BeamformerExportKind_Stats;
    641 			export.size = sizeof(*output);
    642 			result = beamformer_export(export, output, timeout_ms);
    643 		}
    644 	}
    645 	return result;
    646 }
    647 
    648 i32
    649 beamformer_live_parameters_get_dirty_flag(void)
    650 {
    651 	i32 result = -1;
    652 	if (check_shared_memory()) {
    653 		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
    654 		if (flag != 32) {
    655 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
    656 			result = (i32)flag;
    657 		}
    658 	}
    659 	return result;
    660 }
    661 
    662 BeamformerLiveImagingParameters *
    663 beamformer_get_live_parameters(void)
    664 {
    665 	BeamformerLiveImagingParameters *result = 0;
    666 	if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters;
    667 	return result;
    668 }
    669 
    670 b32
    671 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    672 {
    673 	b32 result = 0;
    674 	if (check_shared_memory()) {
    675 		mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new));
    676 		store_fence();
    677 		result = 1;
    678 	}
    679 	return result;
    680 }