ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (22206B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #define BEAMFORMER_IMPORT static
      5 
      6 #include "../beamformer.h"
      7 
      8 #include "../util.h"
      9 
     10 #include "../generated/beamformer.meta.c"
     11 #include "../beamformer_parameters.h"
     12 #include "ogl_beamformer_lib_base.h"
     13 
     14 #if OS_LINUX
     15 #include "../os_linux.c"
     16 #elif OS_WINDOWS
     17 #include "../os_win32.c"
     18 
     19 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     20 
     21 #else
     22 #error Unsupported Platform
     23 #endif
     24 
     25 #include "../util_os.c"
     26 #include "../beamformer_shared_memory.c"
     27 
     28 global struct {
     29 	BeamformerSharedMemory *bp;
     30 	i32                     timeout_ms;
     31 	BeamformerLibErrorKind  last_error;
     32 	i64                     shared_memory_size;
     33 } g_beamformer_library_context;
     34 
     35 #if OS_LINUX
     36 
     37 function s8
     38 os_open_shared_memory_area(char *name)
     39 {
     40 	s8 result = {0};
     41 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     42 	if (fd > 0) {
     43 		struct stat sb;
     44 		if (fstat(fd, &sb) != -1) {
     45 			void *new = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     46 			if (new != MAP_FAILED) {
     47 				result.data = new;
     48 				result.len  = sb.st_size;
     49 			}
     50 		}
     51 		close(fd);
     52 	}
     53 	return result;
     54 }
     55 
     56 function void
     57 os_close_shared_memory_area(void *memory, i64 size)
     58 {
     59 	munmap(memory, size);
     60 }
     61 
     62 #elif OS_WINDOWS
     63 
     64 W32(u64) VirtualQuery(void *base_address, void *memory_basic_info, u64 memory_basic_info_size);
     65 W32(b32) UnmapViewOfFile(void *);
     66 
     67 function b32
     68 os_reserve_region_locks(void)
     69 {
     70 	u8 buffer[1024];
     71 	Stream sb = {.data = buffer, .cap = countof(buffer)};
     72 	stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_"));
     73 
     74 	i32 start_index    = sb.widx;
     75 	u32 reserved_count = 0;
     76 	for EachElement(os_w32_shared_memory_semaphores, it) {
     77 		stream_reset(&sb, start_index);
     78 		stream_append_u64(&sb, it);
     79 		stream_append_byte(&sb, 0);
     80 		os_w32_shared_memory_semaphores[it] = os_w32_create_semaphore((c8 *)sb.data, 1, 1);
     81 		if InvalidHandle(os_w32_shared_memory_semaphores[it])
     82 			break;
     83 		reserved_count++;
     84 	}
     85 
     86 	b32 result = reserved_count == countof(os_w32_shared_memory_semaphores);
     87 	if (!result) {
     88 		for (u32 i = 0; i < reserved_count; i++)
     89 			CloseHandle(os_w32_shared_memory_semaphores[i].value[0]);
     90 	}
     91 
     92 	return result;
     93 }
     94 
     95 function s8
     96 os_open_shared_memory_area(char *name)
     97 {
     98 	struct alignas(16) {
     99 		void *BaseAddress;
    100 		void *AllocationBase;
    101 		u32   AllocationProtect;
    102 		u32   __alignment1;
    103 		u64   RegionSize;
    104 		u32   State;
    105 		u32   Protect;
    106 		u32   Type;
    107 		u32   __alignment2;
    108 	} memory_basic_info;
    109 
    110 	s8 result = {0};
    111 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
    112 	if (h != INVALID_FILE) {
    113 		// NOTE(rnp): a size of 0 maps the whole region, we can determine its size after
    114 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0);
    115 		if (new &&
    116 		    VirtualQuery(new, &memory_basic_info, sizeof(memory_basic_info)) == sizeof(memory_basic_info) &&
    117 		    os_reserve_region_locks())
    118 		{
    119 			result.data = new;
    120 			result.len  = (i64)memory_basic_info.RegionSize;
    121 		}
    122 
    123 		if (new && !result.data)
    124 			UnmapViewOfFile(new);
    125 
    126 		CloseHandle(h);
    127 	}
    128 	return result;
    129 }
    130 
    131 function void
    132 os_close_shared_memory_area(void *memory, i64 size)
    133 {
    134 	UnmapViewOfFile(memory);
    135 }
    136 
    137 #endif
    138 
    139 #define lib_error_check(c, e) lib_error_check_(c, BeamformerLibErrorKind_##e)
    140 function b32
    141 lib_error_check_(b32 condition, BeamformerLibErrorKind error_kind)
    142 {
    143 	b32 result = condition;
    144 	if (!result) g_beamformer_library_context.last_error = error_kind;
    145 	assert(result);
    146 	return result;
    147 }
    148 
    149 function b32
    150 check_shared_memory(void)
    151 {
    152 	b32 result = g_beamformer_library_context.bp != 0;
    153 	if unlikely(!g_beamformer_library_context.bp) {
    154 		s8 shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
    155 		if (lib_error_check(shared_memory.data != 0, SharedMemory)) {
    156 			BeamformerSharedMemory *bp = (BeamformerSharedMemory *)shared_memory.data;
    157 			result = lib_error_check(bp->version == BEAMFORMER_SHARED_MEMORY_VERSION, VersionMismatch);
    158 			if (result) {
    159 				g_beamformer_library_context.bp                 = bp;
    160 				g_beamformer_library_context.shared_memory_size = shared_memory.len;
    161 			} else {
    162 				os_close_shared_memory_area(shared_memory.data, shared_memory.len);
    163 			}
    164 		}
    165 	}
    166 
    167 	if likely(g_beamformer_library_context.bp)
    168 		result = lib_error_check(likely(!g_beamformer_library_context.bp->invalid), InvalidAccess);
    169 	return result;
    170 }
    171 
    172 function b32
    173 valid_parameter_block(u32 block)
    174 {
    175 	b32 result = check_shared_memory();
    176 	if (result) {
    177 		result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    178 		                         ParameterBlockUnallocated);
    179 	}
    180 	return result;
    181 }
    182 
    183 function BeamformWork *
    184 try_push_work_queue(void)
    185 {
    186 	BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue);
    187 	lib_error_check(result != 0, WorkQueueFull);
    188 	return result;
    189 }
    190 
    191 function b32
    192 lib_try_lock(i32 lock, i32 timeout_ms)
    193 {
    194 	b32 result = beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, (u32)timeout_ms);
    195 	lib_error_check(result, SyncVariable);
    196 	return result;
    197 }
    198 
    199 function void
    200 lib_release_lock(i32 lock)
    201 {
    202 	beamformer_shared_memory_release_lock(g_beamformer_library_context.bp, lock);
    203 }
    204 
    205 u32
    206 beamformer_get_api_version(void)
    207 {
    208 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    209 }
    210 
    211 const char *
    212 beamformer_error_string(BeamformerLibErrorKind kind)
    213 {
    214 	#define X(type, num, string) string,
    215 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    216 	#undef X
    217 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    218 }
    219 
    220 BeamformerLibErrorKind
    221 beamformer_get_last_error(void)
    222 {
    223 	return g_beamformer_library_context.last_error;
    224 }
    225 
    226 const char *
    227 beamformer_get_last_error_string(void)
    228 {
    229 	return beamformer_error_string(beamformer_get_last_error());
    230 }
    231 
    232 void
    233 beamformer_set_global_timeout(u32 timeout_ms)
    234 {
    235 	g_beamformer_library_context.timeout_ms = timeout_ms;
    236 }
    237 
    238 b32
    239 beamformer_reserve_parameter_blocks(uint32_t count)
    240 {
    241 	b32 result = 0;
    242 	if (check_shared_memory() &&
    243 	    lib_error_check(count <= BeamformerMaxParameterBlockSlots, ParameterBlockOverflow))
    244 	{
    245 		g_beamformer_library_context.bp->reserved_parameter_blocks = count;
    246 		result = 1;
    247 	}
    248 	return result;
    249 }
    250 
    251 function b32
    252 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    253 {
    254 	b32 data_kind_test = Between(data_kind, 0, BeamformerDataKind_Count - 1);
    255 	                     //data_kind != BeamformerDataKind_Float16 &&
    256 	                     //data_kind != BeamformerDataKind_Float16Complex;
    257 	if (!lib_error_check(data_kind_test, InvalidDataKind))
    258 		return 0;
    259 
    260 	if (!lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, ComputeStageOverflow))
    261 		return 0;
    262 
    263 	for (u32 i = 0; i < shader_count; i++) {
    264 		b32 stage_test = Between(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
    265 		if (!lib_error_check(stage_test, InvalidComputeStage))
    266 			return 0;
    267 
    268 		if (shaders[i] == BeamformerShaderKind_Demodulate &&
    269 		    !lib_error_check(!beamformer_data_kind_complex[data_kind], InvalidDemodulationDataKind))
    270 		{
    271 			return 0;
    272 		}
    273 	}
    274 
    275 	b32 start_stage_test = shaders[0] == BeamformerShaderKind_Demodulate ||
    276 	                       shaders[0] == BeamformerShaderKind_Decode;
    277 	if (!lib_error_check(start_stage_test, InvalidStartShader))
    278 		return 0;
    279 
    280 	return 1;
    281 }
    282 
    283 function b32
    284 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
    285                               u32 block_offset, i32 timeout_ms)
    286 {
    287 	i32 lock   = BeamformerSharedMemoryLockKind_Count + (i32)block;
    288 	b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms);
    289 	if (result) {
    290 		mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset,
    291 		         data, size);
    292 		mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id);
    293 		lib_release_lock(lock);
    294 	}
    295 	return result;
    296 }
    297 
    298 b32
    299 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block)
    300 {
    301 	u32 offset  = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline];
    302 	offset     += offsetof(BeamformerComputePipeline, parameters);
    303 	offset     += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters);
    304 	b32 result  = parameter_block_region_upload(&parameter, sizeof(BeamformerShaderParameters), block,
    305 	                                            BeamformerParameterBlockRegion_ComputePipeline, offset,
    306 	                                            g_beamformer_library_context.timeout_ms);
    307 	return result;
    308 }
    309 
    310 b32
    311 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter)
    312 {
    313 	b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0);
    314 	return result;
    315 }
    316 
    317 b32
    318 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block)
    319 {
    320 	b32 result = 0;
    321 	if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) {
    322 		i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block;
    323 		if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) {
    324 			BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    325 			mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders));
    326 			mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block,
    327 			                                  BeamformerParameterBlockRegion_ComputePipeline);
    328 			b->pipeline.shader_count = shader_count;
    329 			b->pipeline.data_kind    = data_kind;
    330 			lib_release_lock(lock);
    331 			result = 1;
    332 		}
    333 	}
    334 	return result;
    335 }
    336 
    337 b32
    338 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    339 {
    340 	b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0);
    341 	return result;
    342 }
    343 
    344 function b32
    345 beamformer_create_filter_base(BeamformerFilterParameters params, u8 filter_slot, u8 parameter_block)
    346 {
    347 	b32 result = 0;
    348 	if (check_shared_memory()) {
    349 		BeamformWork *work = try_push_work_queue();
    350 		if (work) {
    351 			BeamformerCreateFilterContext *ctx = &work->create_filter_context;
    352 			work->kind = BeamformerWorkKind_CreateFilter;
    353 			ctx->parameters      = params;
    354 			ctx->filter_slot     = filter_slot     % BeamformerFilterSlots;
    355 			ctx->parameter_block = parameter_block % BeamformerMaxParameterBlockSlots;
    356 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    357 			result = 1;
    358 		}
    359 	}
    360 	return result;
    361 }
    362 
    363 b32
    364 beamformer_create_filter(BeamformerFilterKind kind, void *filter_parameters, u32 filter_size,
    365                          f32 sampling_frequency, b32 complex, u8 filter_slot, u8 parameter_block)
    366 {
    367 	b32 result = 0;
    368 	if (lib_error_check(kind >= 0 && kind < BeamformerFilterKind_Count, InvalidFilterKind)) {
    369 		BeamformerFilterParameters fp = {0};
    370 		/* NOTE(rnp): any parameter struct works as base offset */
    371 		filter_size = MIN(filter_size, sizeof(fp) - offsetof(BeamformerFilterParameters, kaiser));
    372 		mem_copy(&fp.kaiser, filter_parameters, filter_size);
    373 		fp.kind               = kind;
    374 		fp.complex            = complex != 0;
    375 		fp.sampling_frequency = sampling_frequency;
    376 		result = beamformer_create_filter_base(fp, filter_slot, parameter_block);
    377 	}
    378 	return result;
    379 }
    380 
    381 function void
    382 beamformer_flush_commands(void)
    383 {
    384 	i32 lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    385 	beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, 0);
    386 }
    387 
    388 #define BEAMFORMER_UPLOAD_FNS \
    389 	X(channel_mapping,               i16, 1, ChannelMapping) \
    390 	X(focal_vectors,                 f32, 2, FocalVectors)   \
    391 	X(sparse_elements,               i16, 1, SparseElements) \
    392 	X(transmit_receive_orientations, u8,  1, TransmitReceiveOrientations)
    393 
    394 #define X(name, dtype, elements, region_name) \
    395 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \
    396 	b32 result = 0; \
    397 	if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BufferOverflow)) { \
    398 		result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \
    399 		                                       BeamformerParameterBlockRegion_##region_name,  \
    400 		                                       offsetof(BeamformerParameterBlock, name),      \
    401 		                                       g_beamformer_library_context.timeout_ms);      \
    402 	} \
    403 	return result; \
    404 }
    405 BEAMFORMER_UPLOAD_FNS
    406 #undef X
    407 
    408 #define X(name, dtype, ...) \
    409 b32 beamformer_push_##name (dtype *data, u32 count) { \
    410 	b32 result = beamformer_push_##name ##_at(data, count, 0); \
    411 	return result; \
    412 }
    413 BEAMFORMER_UPLOAD_FNS
    414 #undef X
    415 
    416 function b32
    417 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block)
    418 {
    419 	b32 result = 0;
    420 	Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    421 	                                                       g_beamformer_library_context.shared_memory_size);
    422 	BeamformerParameterBlock *b  = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    423 	BeamformerParameters     *bp = &b->parameters;
    424 	BeamformerDataKind data_kind = b->pipeline.data_kind;
    425 
    426 	u32 size     = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind];
    427 	u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind];
    428 
    429 	if (lib_error_check(size <= arena_capacity(&scratch, u8), BufferOverflow) &&
    430 	    lib_error_check(size <= data_size && data_size == raw_size, DataSizeMismatch))
    431 	{
    432 		if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
    433 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    434 				u32 channel_count      = bp->channel_count;
    435 				u32 out_channel_stride = beamformer_data_kind_byte_size[data_kind] * bp->sample_count * bp->acquisition_count;
    436 				u32 in_channel_stride  = beamformer_data_kind_byte_size[data_kind] * bp->raw_data_dimensions.x;
    437 
    438 				for (u32 channel = 0; channel < channel_count; channel++) {
    439 					u16 data_channel = (u16)b->channel_mapping[channel];
    440 					u32 out_off = out_channel_stride * channel;
    441 					u32 in_off  = in_channel_stride  * data_channel;
    442 					/* TODO(rnp): it would be better to do non temporal copy here, but we can't ensure
    443 					 * 64 byte boundaries. */
    444 					mem_copy(scratch.beg + out_off, (u8 *)data + in_off, out_channel_stride);
    445 				}
    446 
    447 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    448 				/* TODO(rnp): need a better way to communicate this */
    449 				u64 rf_block_rf_size = (u64)block << 32ULL | (u64)size;
    450 				atomic_store_u64(&g_beamformer_library_context.bp->rf_block_rf_size, rf_block_rf_size);
    451 				result = 1;
    452 			}
    453 		}
    454 	}
    455 	return result;
    456 }
    457 
    458 b32
    459 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot)
    460 {
    461 	b32 result = 0;
    462 	if (check_shared_memory()) {
    463 		u32 reserved_blocks = g_beamformer_library_context.bp->reserved_parameter_blocks;
    464 		if (lib_error_check(image_plane_tag < BeamformerViewPlaneTag_Count, InvalidImagePlane) &&
    465 		    lib_error_check(parameter_slot < reserved_blocks, ParameterBlockUnallocated) &&
    466 		    beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms, parameter_slot))
    467 		{
    468 			BeamformWork *work = try_push_work_queue();
    469 			if (work) {
    470 				work->kind = BeamformerWorkKind_ComputeIndirect;
    471 				work->compute_indirect_context.view_plane      = image_plane_tag;
    472 				work->compute_indirect_context.parameter_block = parameter_slot;
    473 				beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    474 				beamformer_flush_commands();
    475 				result = 1;
    476 			}
    477 		}
    478 	}
    479 	return result;
    480 }
    481 
    482 b32
    483 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block)
    484 {
    485 	b32 result = check_shared_memory();
    486 	if (result) {
    487 		result = parameter_block_region_upload(bp, sizeof(*bp), block,
    488 		                                       BeamformerParameterBlockRegion_Parameters,
    489 		                                       offsetof(BeamformerParameterBlock, parameters),
    490 		                                       g_beamformer_library_context.timeout_ms);
    491 		if (result) {
    492 			BeamformerParameterBlock *pb = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    493 			atomic_or_u32(&pb->region_update_flags, 1u << BeamformerParameterRegionFlag_NotifyUI);
    494 		}
    495 	}
    496 	return result;
    497 }
    498 
    499 b32
    500 beamformer_push_parameters(BeamformerParameters *bp)
    501 {
    502 	b32 result = beamformer_push_parameters_at(bp, 0);
    503 	return result;
    504 }
    505 
    506 b32
    507 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block)
    508 {
    509 	b32 result = check_shared_memory();
    510 	if (result) {
    511 		alignas(64) v2 focal_vectors[countof(bp->steering_angles)];
    512 		for (u32 i = 0; i < countof(bp->steering_angles); i++)
    513 			focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}};
    514 
    515 		result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block);
    516 		result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block);
    517 		result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block);
    518 		result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block);
    519 		result &= beamformer_push_transmit_receive_orientations_at(bp->transmit_receive_orientations,
    520 		                                                           bp->acquisition_count, block);
    521 
    522 		if (bp->acquisition_kind == BeamformerAcquisitionKind_UFORCES ||
    523 		    bp->acquisition_kind == BeamformerAcquisitionKind_UHERCULES)
    524 		{
    525 			result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block);
    526 		}
    527 
    528 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++)
    529 			result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block);
    530 	}
    531 	return result;
    532 }
    533 
    534 b32
    535 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp)
    536 {
    537 	b32 result = beamformer_push_simple_parameters_at(bp, 0);
    538 	return result;
    539 }
    540 
    541 function b32
    542 beamformer_export_buffer(BeamformerExportContext export_context)
    543 {
    544 	BeamformWork *work = try_push_work_queue();
    545 	b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0);
    546 	if (result) {
    547 		work->export_context = export_context;
    548 		work->kind = BeamformerWorkKind_ExportBuffer;
    549 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    550 		beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    551 	}
    552 	return result;
    553 }
    554 
    555 function b32
    556 beamformer_export(BeamformerExportContext export, void *out, i32 timeout_ms)
    557 {
    558 	b32 result = 0;
    559 	if (beamformer_export_buffer(export)) {
    560 		/* NOTE(rnp): if this fails it just means that the work from push_data hasn't
    561 		 * started yet. This is here to catch the other case where the work started
    562 		 * and finished before we finished queuing the export work item */
    563 		beamformer_flush_commands();
    564 
    565 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    566 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    567 				Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    568 				                                                       g_beamformer_library_context.shared_memory_size);
    569 				mem_copy(out, scratch.beg, export.size);
    570 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    571 				result = 1;
    572 			}
    573 			lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync);
    574 		}
    575 	}
    576 	return result;
    577 }
    578 
    579 b32
    580 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size,
    581                          void *out_data, int32_t timeout_ms)
    582 {
    583 	b32 result = beamformer_push_simple_parameters(bp);
    584 	if (result) {
    585 		iv3 output_points = bp->output_points.xyz;
    586 		output_points.E[0] = Max(1, output_points.E[0]);
    587 		output_points.E[1] = Max(1, output_points.E[1]);
    588 		output_points.E[2] = Max(1, output_points.E[2]);
    589 
    590 		b32 complex = 0;
    591 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
    592 			BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
    593 			complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
    594 		}
    595 
    596 		iz output_size = output_points.x * output_points.y * output_points.z * (i32)sizeof(f32);
    597 		if (complex) output_size *= 2;
    598 
    599 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    600 		                                                       g_beamformer_library_context.shared_memory_size);
    601 		if (out_data) result &= lib_error_check(output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow);
    602 
    603 		if (result) {
    604 			result = beamformer_push_data_with_compute(data, data_size, 0, 0);
    605 			if (result && out_data) {
    606 				BeamformerExportContext export;
    607 				export.kind = BeamformerExportKind_BeamformedData;
    608 				export.size = (u32)output_size;
    609 				result = beamformer_export(export, out_data, timeout_ms);
    610 			}
    611 		}
    612 	}
    613 	return result;
    614 }
    615 
    616 b32
    617 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    618 {
    619 	b32 result = 0;
    620 	if (check_shared_memory()) {
    621 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
    622 		                                                       g_beamformer_library_context.shared_memory_size);
    623 		if (lib_error_check((iz)sizeof(*output) <= arena_capacity(&scratch, u8), ExportSpaceOverflow)) {
    624 			BeamformerExportContext export;
    625 			export.kind = BeamformerExportKind_Stats;
    626 			export.size = sizeof(*output);
    627 			result = beamformer_export(export, output, timeout_ms);
    628 		}
    629 	}
    630 	return result;
    631 }
    632 
    633 i32
    634 beamformer_live_parameters_get_dirty_flag(void)
    635 {
    636 	i32 result = -1;
    637 	if (check_shared_memory()) {
    638 		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
    639 		if (flag != 32) {
    640 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
    641 			result = (i32)flag;
    642 		}
    643 	}
    644 	return result;
    645 }
    646 
    647 BeamformerLiveImagingParameters *
    648 beamformer_get_live_parameters(void)
    649 {
    650 	BeamformerLiveImagingParameters *result = 0;
    651 	if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters;
    652 	return result;
    653 }
    654 
    655 b32
    656 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    657 {
    658 	b32 result = 0;
    659 	if (check_shared_memory()) {
    660 		mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new));
    661 		store_fence();
    662 		result = 1;
    663 	}
    664 	return result;
    665 }