ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (22506B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #include "../util.h"
      5 #include "../generated/beamformer.meta.c"
      6 #include "../beamformer_parameters.h"
      7 #include "ogl_beamformer_lib_base.h"
      8 
      9 #if OS_LINUX
     10 #include "../os_linux.c"
     11 #elif OS_WINDOWS
     12 #include "../os_win32.c"
     13 
     14 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     15 
     16 #else
     17 #error Unsupported Platform
     18 #endif
     19 
     20 #include "../beamformer_shared_memory.c"
     21 
     22 global struct {
     23 	SharedMemoryRegion      shared_memory;
     24 	BeamformerSharedMemory *bp;
     25 	i32                     timeout_ms;
     26 	BeamformerLibErrorKind  last_error;
     27 } g_beamformer_library_context;
     28 
     29 #if OS_LINUX
     30 
     31 function b32
     32 os_reserve_region_locks(iptr os_context, u32 count)
     33 {
     34 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     35 	return result;
     36 }
     37 
     38 function SharedMemoryRegion
     39 os_open_shared_memory_area(char *name)
     40 {
     41 	SharedMemoryRegion result = {0};
     42 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     43 	if (fd > 0) {
     44 		void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     45 		if (new != MAP_FAILED) result.region = new;
     46 		close(fd);
     47 	}
     48 	return result;
     49 }
     50 
     51 #elif OS_WINDOWS
     52 
     53 function b32
     54 os_reserve_region_locks(iptr os_context, u32 count)
     55 {
     56 	local_persist iptr semaphores[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
     57 	w32_shared_memory_context *ctx = (typeof(ctx))os_context;
     58 
     59 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     60 	if (result) {
     61 		count += BeamformerSharedMemoryLockKind_Count;
     62 		if (count > ctx->reserved_count) {
     63 			u8 buffer[1024];
     64 			Stream sb = {.data = buffer, .cap = countof(buffer)};
     65 			stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_"));
     66 
     67 			u32 new_reserved_count;
     68 			for (new_reserved_count = ctx->reserved_count;
     69 			     new_reserved_count < count && result;
     70 			     new_reserved_count++)
     71 			{
     72 				Stream lb = sb;
     73 				stream_append_u64(&lb, new_reserved_count);
     74 				stream_append_byte(&lb, 0);
     75 				semaphores[new_reserved_count] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data);
     76 				result &= semaphores[new_reserved_count] != INVALID_FILE;
     77 			}
     78 
     79 			if (result) {
     80 				ctx->semaphores     = semaphores;
     81 				ctx->reserved_count = count;
     82 			} else {
     83 				for (u32 j = ctx->reserved_count; j < new_reserved_count; j++)
     84 					CloseHandle(semaphores[j]);
     85 			}
     86 		} else if (count < ctx->reserved_count) {
     87 			for (u32 i = ctx->reserved_count; i > count;)
     88 				CloseHandle(semaphores[--i]);
     89 			ctx->reserved_count = count;
     90 		}
     91 	}
     92 	return result;
     93 }
     94 
     95 function SharedMemoryRegion
     96 os_open_shared_memory_area(char *name)
     97 {
     98 	local_persist w32_shared_memory_context ctx = {0};
     99 	SharedMemoryRegion result = {0};
    100 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
    101 	if (h != INVALID_FILE) {
    102 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE);
    103 		if (new && os_reserve_region_locks((iptr)&ctx, 1)) {
    104 			result.region     = new;
    105 			result.os_context = (iptr)&ctx;
    106 		}
    107 		CloseHandle(h);
    108 	}
    109 	return result;
    110 }
    111 
    112 #endif
    113 
    114 function b32
    115 lib_error_check(b32 condition, BeamformerLibErrorKind error_kind)
    116 {
    117 	b32 result = condition;
    118 	if (!result) g_beamformer_library_context.last_error = error_kind;
    119 	return result;
    120 }
    121 
    122 function b32
    123 check_shared_memory(void)
    124 {
    125 	if (!g_beamformer_library_context.shared_memory.region) {
    126 		g_beamformer_library_context.shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
    127 		if (lib_error_check(g_beamformer_library_context.shared_memory.region != 0, BF_LIB_ERR_KIND_SHARED_MEMORY)) {
    128 			u32 version = ((BeamformerSharedMemory *)g_beamformer_library_context.shared_memory.region)->version;
    129 			if (lib_error_check(version == BEAMFORMER_SHARED_MEMORY_VERSION, BF_LIB_ERR_KIND_VERSION_MISMATCH))
    130 				g_beamformer_library_context.bp = g_beamformer_library_context.shared_memory.region;
    131 		}
    132 	}
    133 
    134 	b32 result = 0;
    135 	if (g_beamformer_library_context.bp)
    136 		result = lib_error_check(!g_beamformer_library_context.bp->invalid, BF_LIB_ERR_KIND_INVALID_ACCESS);
    137 	return result;
    138 }
    139 
    140 function b32
    141 valid_parameter_block(u32 block)
    142 {
    143 	b32 result = check_shared_memory();
    144 	if (result) {
    145 		result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    146 		                         BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED);
    147 	}
    148 	return result;
    149 }
    150 
    151 function BeamformWork *
    152 try_push_work_queue(void)
    153 {
    154 	BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue);
    155 	lib_error_check(result != 0, BF_LIB_ERR_KIND_WORK_QUEUE_FULL);
    156 	return result;
    157 }
    158 
    159 function b32
    160 lib_try_lock(i32 lock, i32 timeout_ms)
    161 {
    162 	b32 result = os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory,
    163 	                                          g_beamformer_library_context.bp->locks,
    164 	                                          lock, (u32)timeout_ms);
    165 	lib_error_check(result, BF_LIB_ERR_KIND_SYNC_VARIABLE);
    166 	return result;
    167 }
    168 
    169 function void
    170 lib_release_lock(i32 lock)
    171 {
    172 	os_shared_memory_region_unlock(&g_beamformer_library_context.shared_memory,
    173 	                               g_beamformer_library_context.bp->locks, (i32)lock);
    174 }
    175 
    176 u32
    177 beamformer_get_api_version(void)
    178 {
    179 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    180 }
    181 
    182 const char *
    183 beamformer_error_string(BeamformerLibErrorKind kind)
    184 {
    185 	#define X(type, num, string) string,
    186 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    187 	#undef X
    188 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    189 }
    190 
    191 BeamformerLibErrorKind
    192 beamformer_get_last_error(void)
    193 {
    194 	return g_beamformer_library_context.last_error;
    195 }
    196 
    197 const char *
    198 beamformer_get_last_error_string(void)
    199 {
    200 	return beamformer_error_string(beamformer_get_last_error());
    201 }
    202 
    203 b32
    204 beamformer_set_global_timeout(i32 timeout_ms)
    205 {
    206 	b32 result = lib_error_check(timeout_ms >= -1, BF_LIB_ERR_KIND_INVALID_TIMEOUT);
    207 	if (result) g_beamformer_library_context.timeout_ms = timeout_ms;
    208 	return result;
    209 }
    210 
    211 b32
    212 beamformer_reserve_parameter_blocks(uint32_t count)
    213 {
    214 	b32 result = 0;
    215 	if (check_shared_memory() &&
    216 	    lib_error_check(os_reserve_region_locks(g_beamformer_library_context.shared_memory.os_context, count),
    217 	                    BF_LIB_ERR_KIND_PARAMETER_BLOCK_OVERFLOW))
    218 	{
    219 		u32 old_count = g_beamformer_library_context.bp->reserved_parameter_blocks;
    220 		g_beamformer_library_context.bp->reserved_parameter_blocks = count;
    221 		for (u32 i = old_count; i < count; i++)
    222 			zero_struct(beamformer_parameter_block(g_beamformer_library_context.bp, i));
    223 		result = 1;
    224 	}
    225 	return result;
    226 }
    227 
    228 function b32
    229 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    230 {
    231 	b32 result = lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW);
    232 	if (result) {
    233 		for (u32 i = 0; i < shader_count; i++)
    234 			result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
    235 		if (!result) {
    236 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
    237 		} else if (shaders[0] != BeamformerShaderKind_Demodulate &&
    238 		           shaders[0] != BeamformerShaderKind_Decode)
    239 		{
    240 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_START_SHADER;
    241 			result = 0;
    242 		} else if (shaders[0] == BeamformerShaderKind_Demodulate &&
    243 		           !(data_kind == BeamformerDataKind_Int16 || data_kind == BeamformerDataKind_Float32))
    244 		{
    245 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_DEMOD_DATA_KIND;
    246 			result = 0;
    247 		}
    248 	}
    249 	return result;
    250 }
    251 
    252 function b32
    253 validate_simple_parameters(BeamformerSimpleParameters *bp)
    254 {
    255 	b32 result = check_shared_memory();
    256 	if (result) {
    257 		result &= bp->channel_count <= BeamformerMaxChannelCount;
    258 		if (!result)
    259 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_SIMPLE_PARAMETERS;
    260 	}
    261 	return result;
    262 }
    263 
    264 function b32
    265 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
    266                               u32 block_offset, i32 timeout_ms)
    267 {
    268 	i32 lock   = BeamformerSharedMemoryLockKind_Count + (i32)block;
    269 	b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms);
    270 	if (result) {
    271 		mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset,
    272 		         data, size);
    273 		mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id);
    274 		lib_release_lock(lock);
    275 	}
    276 	return result;
    277 }
    278 
    279 b32
    280 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block)
    281 {
    282 	u32 offset  = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline];
    283 	offset     += offsetof(BeamformerComputePipeline, parameters);
    284 	offset     += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters);
    285 	b32 result  = parameter_block_region_upload(&parameter, sizeof(BeamformerShaderParameters), block,
    286 	                                            BeamformerParameterBlockRegion_ComputePipeline, offset,
    287 	                                            g_beamformer_library_context.timeout_ms);
    288 	return result;
    289 }
    290 
    291 b32
    292 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter)
    293 {
    294 	b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0);
    295 	return result;
    296 }
    297 
    298 b32
    299 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block)
    300 {
    301 	b32 result = 0;
    302 	if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) {
    303 		i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block;
    304 		if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) {
    305 			BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    306 			mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders));
    307 			mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block,
    308 			                                  BeamformerParameterBlockRegion_ComputePipeline);
    309 			b->pipeline.shader_count = shader_count;
    310 			b->pipeline.data_kind    = data_kind;
    311 			lib_release_lock(lock);
    312 			result = 1;
    313 		}
    314 	}
    315 	return result;
    316 }
    317 
    318 b32
    319 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    320 {
    321 	b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0);
    322 	return result;
    323 }
    324 
    325 function b32
    326 beamformer_create_filter_base(BeamformerFilterKind kind, BeamformerFilterParameters params, u8 filter_slot, u8 parameter_block)
    327 {
    328 	b32 result = 0;
    329 	if (check_shared_memory()) {
    330 		BeamformWork *work = try_push_work_queue();
    331 		if (work) {
    332 			BeamformerCreateFilterContext *ctx = &work->create_filter_context;
    333 			work->kind = BeamformerWorkKind_CreateFilter;
    334 			ctx->kind            = kind;
    335 			ctx->parameters      = params;
    336 			ctx->filter_slot     = filter_slot     % BeamformerFilterSlots;
    337 			ctx->parameter_block = parameter_block % BeamformerMaxParameterBlockSlots;
    338 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    339 			result = 1;
    340 		}
    341 	}
    342 	return result;
    343 }
    344 
    345 b32
    346 beamformer_create_filter(BeamformerFilterKind kind, f32 *filter_parameters, u32 filter_parameter_count,
    347                          f32 sampling_frequency, b32 complex, u8 filter_slot, u8 parameter_block)
    348 {
    349 	b32 result = 0;
    350 	if (lib_error_check(kind >= 0 && kind < BeamformerFilterKind_Count, BF_LIB_ERR_KIND_INVALID_FILTER_KIND)) {
    351 		BeamformerFilterParameters fp = {.sampling_frequency = sampling_frequency, .complex = complex != 0};
    352 		#define X(kind, ...) sizeof(fp.kind),
    353 		read_only local_persist u32 kind_sizes[] = {BEAMFORMER_FILTER_KIND_LIST(,)};
    354 		#undef X
    355 		if (lib_error_check(kind_sizes[kind] == sizeof(f32) * filter_parameter_count,
    356 		                    BF_LIB_ERR_KIND_INVALID_FILTER_PARAM_COUNT))
    357 		{
    358 			/* NOTE(rnp): any filter kind struct works as base offset of union */
    359 			mem_copy(&fp.Kaiser, filter_parameters, kind_sizes[kind]);
    360 			result = beamformer_create_filter_base(kind, fp, filter_slot, parameter_block);
    361 		}
    362 	}
    363 	return result;
    364 }
    365 
    366 function b32
    367 beamformer_flush_commands(i32 timeout_ms)
    368 {
    369 	b32 result = lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms);
    370 	return result;
    371 }
    372 
    373 function b32
    374 beamformer_compute_indirect(BeamformerViewPlaneTag tag, u32 block)
    375 {
    376 	b32 result = 0;
    377 	if (check_shared_memory() &&
    378 	    lib_error_check(tag   < BeamformerViewPlaneTag_Count, BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE) &&
    379 	    lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    380 	                    BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED))
    381 	{
    382 		BeamformWork *work = try_push_work_queue();
    383 		if (work) {
    384 			work->kind = BeamformerWorkKind_ComputeIndirect;
    385 			work->compute_indirect_context.view_plane      = tag;
    386 			work->compute_indirect_context.parameter_block = block;
    387 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    388 			beamformer_flush_commands(0);
    389 			result = 1;
    390 		}
    391 	}
    392 	return result;
    393 }
    394 
    395 b32
    396 beamformer_start_compute(void)
    397 {
    398 	b32 result = beamformer_compute_indirect(0, 0);
    399 	return result;
    400 }
    401 
    402 b32
    403 beamformer_wait_for_compute_dispatch(i32 timeout_ms)
    404 {
    405 	b32 result = beamformer_flush_commands(timeout_ms);
    406 	/* NOTE(rnp): if you are calling this function you are probably about
    407 	 * to start some other work and it might be better to not do this... */
    408 	if (result) lib_release_lock(BeamformerSharedMemoryLockKind_DispatchCompute);
    409 	return result;
    410 }
    411 
    412 #define BEAMFORMER_UPLOAD_FNS \
    413 	X(channel_mapping, i16, 1, ChannelMapping) \
    414 	X(sparse_elements, i16, 1, SparseElements) \
    415 	X(focal_vectors,   f32, 2, FocalVectors)
    416 
    417 #define X(name, dtype, elements, region_name) \
    418 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \
    419 	b32 result = 0; \
    420 	if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BF_LIB_ERR_KIND_BUFFER_OVERFLOW)) { \
    421 		result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \
    422 		                                       BeamformerParameterBlockRegion_##region_name,  \
    423 		                                       offsetof(BeamformerParameterBlock, name),      \
    424 		                                       g_beamformer_library_context.timeout_ms);      \
    425 	} \
    426 	return result; \
    427 }
    428 BEAMFORMER_UPLOAD_FNS
    429 #undef X
    430 
    431 #define X(name, dtype, ...) \
    432 b32 beamformer_push_##name (dtype *data, u32 count) { \
    433 	b32 result = beamformer_push_##name ##_at(data, count, 0); \
    434 	return result; \
    435 }
    436 BEAMFORMER_UPLOAD_FNS
    437 #undef X
    438 
    439 function b32
    440 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms)
    441 {
    442 	b32 result = 0;
    443 	if (check_shared_memory()) {
    444 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    445 		if (lib_error_check(data_size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_BUFFER_OVERFLOW)) {
    446 			if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
    447 				if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    448 					mem_copy(scratch.beg, data, data_size);
    449 					/* TODO(rnp): need a better way to communicate this */
    450 					g_beamformer_library_context.bp->scratch_rf_size = data_size;
    451 					lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    452 					result = 1;
    453 				}
    454 			}
    455 		}
    456 	}
    457 	return result;
    458 }
    459 
    460 b32
    461 beamformer_push_data(void *data, u32 data_size)
    462 {
    463 	return beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms);
    464 }
    465 
    466 b32
    467 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot)
    468 {
    469 	b32 result = beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms);
    470 	if (result) result = beamformer_compute_indirect(image_plane_tag, parameter_slot);
    471 	return result;
    472 }
    473 
    474 b32
    475 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block)
    476 {
    477 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), block,
    478 	                                           BeamformerParameterBlockRegion_Parameters,
    479 	                                           offsetof(BeamformerParameterBlock, parameters),
    480 	                                           g_beamformer_library_context.timeout_ms);
    481 	return result;
    482 }
    483 
    484 b32
    485 beamformer_push_parameters(BeamformerParameters *bp)
    486 {
    487 	b32 result = beamformer_push_parameters_at(bp, 0);
    488 	return result;
    489 }
    490 
    491 b32
    492 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block)
    493 {
    494 	b32 result = validate_simple_parameters(bp);
    495 	if (result) {
    496 		result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block);
    497 		result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block);
    498 		result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block);
    499 		if (bp->das_shader_id == BeamformerDASKind_UFORCES || bp->das_shader_id == BeamformerDASKind_UHERCULES)
    500 			result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block);
    501 
    502 		alignas(64) v2 focal_vectors[countof(bp->steering_angles)];
    503 		for (u32 i = 0; i < countof(bp->steering_angles); i++)
    504 			focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}};
    505 		result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block);
    506 
    507 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++)
    508 			result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block);
    509 	}
    510 	return result;
    511 }
    512 
    513 b32
    514 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp)
    515 {
    516 	b32 result = beamformer_push_simple_parameters_at(bp, 0);
    517 	return result;
    518 }
    519 
    520 b32
    521 beamformer_push_parameters_ui(BeamformerUIParameters *bp)
    522 {
    523 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    524 	                                           offsetof(BeamformerParameterBlock, parameters_ui),
    525 	                                           g_beamformer_library_context.timeout_ms);
    526 	return result;
    527 }
    528 
    529 b32
    530 beamformer_push_parameters_head(BeamformerParametersHead *bp)
    531 {
    532 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    533 	                                           offsetof(BeamformerParameterBlock, parameters_head),
    534 	                                           g_beamformer_library_context.timeout_ms);
    535 	return result;
    536 }
    537 
    538 function b32
    539 beamformer_export_buffer(BeamformerExportContext export_context)
    540 {
    541 	BeamformWork *work = try_push_work_queue();
    542 	b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0);
    543 	if (result) {
    544 		work->export_context = export_context;
    545 		work->kind = BeamformerWorkKind_ExportBuffer;
    546 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    547 		beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    548 	}
    549 	return result;
    550 }
    551 
    552 function b32
    553 beamformer_read_output(void *out, iz size, i32 timeout_ms)
    554 {
    555 	b32 result = 0;
    556 	if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    557 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    558 			Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    559 			mem_copy(out, scratch.beg, (uz)size);
    560 			lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    561 			result = 1;
    562 		}
    563 		lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync);
    564 	}
    565 	return result;
    566 }
    567 
    568 b32
    569 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size,
    570                          void *out_data, int32_t timeout_ms)
    571 {
    572 	b32 result = validate_simple_parameters(bp);
    573 	if (result) {
    574 		bp->output_points[0] = MAX(1, bp->output_points[0]);
    575 		bp->output_points[1] = MAX(1, bp->output_points[1]);
    576 		bp->output_points[2] = MAX(1, bp->output_points[2]);
    577 
    578 		beamformer_push_simple_parameters(bp);
    579 
    580 		b32 complex = 0;
    581 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
    582 			BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
    583 			complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
    584 		}
    585 
    586 		iz output_size = bp->output_points[0] * bp->output_points[1] * bp->output_points[2] * (i32)sizeof(f32);
    587 		if (complex) output_size *= 2;
    588 
    589 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    590 		if (result && lib_error_check(output_size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)
    591 		    && beamformer_push_data_with_compute(data, data_size, 0, 0))
    592 		{
    593 			BeamformerExportContext export;
    594 			export.kind = BeamformerExportKind_BeamformedData;
    595 			export.size = (u32)output_size;
    596 			if (beamformer_export_buffer(export)) {
    597 				/* NOTE(rnp): if this fails it just means that the work from push_data hasn't
    598 				 * started yet. This is here to catch the other case where the work started
    599 				 * and finished before we finished queuing the export work item */
    600 				beamformer_flush_commands(0);
    601 
    602 				result = beamformer_read_output(out_data, output_size, timeout_ms);
    603 			}
    604 		}
    605 	}
    606 	return result;
    607 }
    608 
    609 b32
    610 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    611 {
    612 	static_assert(sizeof(*output) <= BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE,
    613 	              "timing table size exceeds scratch space");
    614 
    615 	b32 result = 0;
    616 	if (check_shared_memory()) {
    617 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    618 		if (lib_error_check(arena_capacity(&scratch, u8) <= (iz)sizeof(*output), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)) {
    619 			BeamformerExportContext export;
    620 			export.kind = BeamformerExportKind_Stats;
    621 			export.size = sizeof(*output);
    622 			if (beamformer_export_buffer(export) && beamformer_flush_commands(0))
    623 				result = beamformer_read_output(output, sizeof(*output), timeout_ms);
    624 		}
    625 	}
    626 	return result;
    627 }
    628 
    629 i32
    630 beamformer_live_parameters_get_dirty_flag(void)
    631 {
    632 	i32 result = -1;
    633 	if (check_shared_memory()) {
    634 		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
    635 		if (flag != 32) {
    636 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
    637 			result = (i32)flag;
    638 		}
    639 	}
    640 	return result;
    641 }
    642 
    643 BeamformerLiveImagingParameters *
    644 beamformer_get_live_parameters(void)
    645 {
    646 	BeamformerLiveImagingParameters *result = 0;
    647 	if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters;
    648 	return result;
    649 }
    650 
    651 b32
    652 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    653 {
    654 	b32 result = 0;
    655 	if (check_shared_memory()) {
    656 		mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new));
    657 		memory_write_barrier();
    658 		result = 1;
    659 	}
    660 	return result;
    661 }