ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (22920B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #include "../util.h"
      5 #include "../generated/beamformer.meta.c"
      6 #include "../beamformer_parameters.h"
      7 #include "ogl_beamformer_lib_base.h"
      8 
      9 #if OS_LINUX
     10 #include "../os_linux.c"
     11 #elif OS_WINDOWS
     12 #include "../os_win32.c"
     13 
     14 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     15 
     16 #else
     17 #error Unsupported Platform
     18 #endif
     19 
     20 #include "../beamformer_shared_memory.c"
     21 
     22 global struct {
     23 	SharedMemoryRegion      shared_memory;
     24 	BeamformerSharedMemory *bp;
     25 	i32                     timeout_ms;
     26 	BeamformerLibErrorKind  last_error;
     27 } g_beamformer_library_context;
     28 
     29 #if OS_LINUX
     30 
     31 function b32
     32 os_reserve_region_locks(iptr os_context, u32 count)
     33 {
     34 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     35 	return result;
     36 }
     37 
     38 function SharedMemoryRegion
     39 os_open_shared_memory_area(char *name)
     40 {
     41 	SharedMemoryRegion result = {0};
     42 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     43 	if (fd > 0) {
     44 		void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     45 		if (new != MAP_FAILED) result.region = new;
     46 		close(fd);
     47 	}
     48 	return result;
     49 }
     50 
     51 #elif OS_WINDOWS
     52 
     53 function b32
     54 os_reserve_region_locks(iptr os_context, u32 count)
     55 {
     56 	local_persist iptr semaphores[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
     57 	w32_shared_memory_context *ctx = (typeof(ctx))os_context;
     58 
     59 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     60 	if (result) {
     61 		count += BeamformerSharedMemoryLockKind_Count;
     62 		if (count > ctx->reserved_count) {
     63 			u8 buffer[1024];
     64 			Stream sb = {.data = buffer, .cap = countof(buffer)};
     65 			stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_"));
     66 
     67 			u32 new_reserved_count;
     68 			for (new_reserved_count = ctx->reserved_count;
     69 			     new_reserved_count < count && result;
     70 			     new_reserved_count++)
     71 			{
     72 				Stream lb = sb;
     73 				stream_append_u64(&lb, new_reserved_count);
     74 				stream_append_byte(&lb, 0);
     75 				semaphores[new_reserved_count] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data);
     76 				result &= semaphores[new_reserved_count] != INVALID_FILE;
     77 			}
     78 
     79 			if (result) {
     80 				ctx->semaphores     = semaphores;
     81 				ctx->reserved_count = count;
     82 			} else {
     83 				for (u32 j = ctx->reserved_count; j < new_reserved_count; j++)
     84 					CloseHandle(semaphores[j]);
     85 			}
     86 		} else if (count < ctx->reserved_count) {
     87 			for (u32 i = ctx->reserved_count; i > count;)
     88 				CloseHandle(semaphores[--i]);
     89 			ctx->reserved_count = count;
     90 		}
     91 	}
     92 	return result;
     93 }
     94 
     95 function SharedMemoryRegion
     96 os_open_shared_memory_area(char *name)
     97 {
     98 	local_persist w32_shared_memory_context ctx = {0};
     99 	SharedMemoryRegion result = {0};
    100 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
    101 	if (h != INVALID_FILE) {
    102 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE);
    103 		if (new && os_reserve_region_locks((iptr)&ctx, 1)) {
    104 			result.region     = new;
    105 			result.os_context = (iptr)&ctx;
    106 		}
    107 		CloseHandle(h);
    108 	}
    109 	return result;
    110 }
    111 
    112 #endif
    113 
    114 function b32
    115 lib_error_check(b32 condition, BeamformerLibErrorKind error_kind)
    116 {
    117 	b32 result = condition;
    118 	if (!result) g_beamformer_library_context.last_error = error_kind;
    119 	return result;
    120 }
    121 
    122 function b32
    123 check_shared_memory(void)
    124 {
    125 	if (!g_beamformer_library_context.shared_memory.region) {
    126 		g_beamformer_library_context.shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
    127 		if (lib_error_check(g_beamformer_library_context.shared_memory.region != 0, BF_LIB_ERR_KIND_SHARED_MEMORY)) {
    128 			u32 version = ((BeamformerSharedMemory *)g_beamformer_library_context.shared_memory.region)->version;
    129 			if (lib_error_check(version == BEAMFORMER_SHARED_MEMORY_VERSION, BF_LIB_ERR_KIND_VERSION_MISMATCH))
    130 				g_beamformer_library_context.bp = g_beamformer_library_context.shared_memory.region;
    131 		}
    132 	}
    133 
    134 	b32 result = 0;
    135 	if (g_beamformer_library_context.bp)
    136 		result = lib_error_check(!g_beamformer_library_context.bp->invalid, BF_LIB_ERR_KIND_INVALID_ACCESS);
    137 	return result;
    138 }
    139 
    140 function b32
    141 valid_parameter_block(u32 block)
    142 {
    143 	b32 result = check_shared_memory();
    144 	if (result) {
    145 		result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    146 		                         BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED);
    147 	}
    148 	return result;
    149 }
    150 
    151 function BeamformWork *
    152 try_push_work_queue(void)
    153 {
    154 	BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue);
    155 	lib_error_check(result != 0, BF_LIB_ERR_KIND_WORK_QUEUE_FULL);
    156 	return result;
    157 }
    158 
    159 function b32
    160 lib_try_lock(i32 lock, i32 timeout_ms)
    161 {
    162 	b32 result = os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory,
    163 	                                          g_beamformer_library_context.bp->locks,
    164 	                                          lock, (u32)timeout_ms);
    165 	lib_error_check(result, BF_LIB_ERR_KIND_SYNC_VARIABLE);
    166 	return result;
    167 }
    168 
    169 function void
    170 lib_release_lock(i32 lock)
    171 {
    172 	os_shared_memory_region_unlock(&g_beamformer_library_context.shared_memory,
    173 	                               g_beamformer_library_context.bp->locks, (i32)lock);
    174 }
    175 
    176 u32
    177 beamformer_get_api_version(void)
    178 {
    179 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    180 }
    181 
    182 const char *
    183 beamformer_error_string(BeamformerLibErrorKind kind)
    184 {
    185 	#define X(type, num, string) string,
    186 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    187 	#undef X
    188 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    189 }
    190 
    191 BeamformerLibErrorKind
    192 beamformer_get_last_error(void)
    193 {
    194 	return g_beamformer_library_context.last_error;
    195 }
    196 
    197 const char *
    198 beamformer_get_last_error_string(void)
    199 {
    200 	return beamformer_error_string(beamformer_get_last_error());
    201 }
    202 
    203 b32
    204 beamformer_set_global_timeout(i32 timeout_ms)
    205 {
    206 	b32 result = lib_error_check(timeout_ms >= -1, BF_LIB_ERR_KIND_INVALID_TIMEOUT);
    207 	if (result) g_beamformer_library_context.timeout_ms = timeout_ms;
    208 	return result;
    209 }
    210 
    211 b32
    212 beamformer_reserve_parameter_blocks(uint32_t count)
    213 {
    214 	b32 result = 0;
    215 	if (check_shared_memory() &&
    216 	    lib_error_check(os_reserve_region_locks(g_beamformer_library_context.shared_memory.os_context, count),
    217 	                    BF_LIB_ERR_KIND_PARAMETER_BLOCK_OVERFLOW))
    218 	{
    219 		u32 old_count = g_beamformer_library_context.bp->reserved_parameter_blocks;
    220 		g_beamformer_library_context.bp->reserved_parameter_blocks = count;
    221 		for (u32 i = old_count; i < count; i++)
    222 			zero_struct(beamformer_parameter_block(g_beamformer_library_context.bp, i));
    223 		result = 1;
    224 	}
    225 	return result;
    226 }
    227 
    228 function b32
    229 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    230 {
    231 	b32 result = lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW);
    232 	if (result) {
    233 		for (u32 i = 0; i < shader_count; i++)
    234 			result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
    235 		if (!result) {
    236 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
    237 		} else if (shaders[0] != BeamformerShaderKind_Demodulate &&
    238 		           shaders[0] != BeamformerShaderKind_Decode)
    239 		{
    240 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_START_SHADER;
    241 			result = 0;
    242 		} else if (shaders[0] == BeamformerShaderKind_Demodulate &&
    243 		           !(data_kind == BeamformerDataKind_Int16 || data_kind == BeamformerDataKind_Float32))
    244 		{
    245 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_DEMOD_DATA_KIND;
    246 			result = 0;
    247 		}
    248 	}
    249 	return result;
    250 }
    251 
    252 function b32
    253 validate_simple_parameters(BeamformerSimpleParameters *bp)
    254 {
    255 	b32 result = check_shared_memory();
    256 	if (result) {
    257 		result &= bp->channel_count <= BeamformerMaxChannelCount;
    258 		if (!result)
    259 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_SIMPLE_PARAMETERS;
    260 	}
    261 	return result;
    262 }
    263 
    264 function b32
    265 parameter_block_region_upload_explicit(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
    266                                        u32 block_offset, i32 timeout_ms)
    267 {
    268 	i32 lock   = BeamformerSharedMemoryLockKind_Count + (i32)block;
    269 	b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms);
    270 	if (result) {
    271 		mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset,
    272 		         data, size);
    273 		mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id);
    274 		lib_release_lock(lock);
    275 	}
    276 	return result;
    277 }
    278 
    279 
    280 function b32
    281 parameter_block_region_upload(void *data, u32 size, u32 block,
    282                               BeamformerParameterBlockRegions region_id, i32 timeout_ms)
    283 {
    284 	assert(region_id < BeamformerParameterBlockRegion_Count);
    285 	b32 result = parameter_block_region_upload_explicit(data, size, block, region_id,
    286 	                                                    BeamformerParameterBlockRegionOffsets[region_id],
    287 	                                                    timeout_ms);
    288 	return result;
    289 }
    290 
    291 b32
    292 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block)
    293 {
    294 	u32 offset  = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline];
    295 	offset     += offsetof(BeamformerComputePipeline, parameters);
    296 	offset     += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters);
    297 	b32 result  = parameter_block_region_upload_explicit(&parameter, sizeof(BeamformerShaderParameters), block,
    298 	                                                     BeamformerParameterBlockRegion_ComputePipeline, offset,
    299 	                                                     g_beamformer_library_context.timeout_ms);
    300 	return result;
    301 }
    302 
    303 b32
    304 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter)
    305 {
    306 	b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0);
    307 	return result;
    308 }
    309 
    310 b32
    311 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block)
    312 {
    313 	b32 result = 0;
    314 	if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) {
    315 		i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block;
    316 		if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) {
    317 			BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    318 			mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders));
    319 			mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block,
    320 			                                  BeamformerParameterBlockRegion_ComputePipeline);
    321 			b->pipeline.shader_count = shader_count;
    322 			b->pipeline.data_kind    = data_kind;
    323 			lib_release_lock(lock);
    324 			result = 1;
    325 		}
    326 	}
    327 	return result;
    328 }
    329 
    330 b32
    331 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    332 {
    333 	b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0);
    334 	return result;
    335 }
    336 
    337 function b32
    338 beamformer_create_filter_base(BeamformerFilterKind kind, BeamformerFilterParameters params, u8 filter_slot, u8 parameter_block)
    339 {
    340 	b32 result = 0;
    341 	if (check_shared_memory()) {
    342 		BeamformWork *work = try_push_work_queue();
    343 		if (work) {
    344 			BeamformerCreateFilterContext *ctx = &work->create_filter_context;
    345 			work->kind = BeamformerWorkKind_CreateFilter;
    346 			ctx->kind            = kind;
    347 			ctx->parameters      = params;
    348 			ctx->filter_slot     = filter_slot     % BeamformerFilterSlots;
    349 			ctx->parameter_block = parameter_block % BeamformerMaxParameterBlockSlots;
    350 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    351 			result = 1;
    352 		}
    353 	}
    354 	return result;
    355 }
    356 
    357 b32
    358 beamformer_create_filter(BeamformerFilterKind kind, f32 *filter_parameters, u32 filter_parameter_count,
    359                          f32 sampling_frequency, b32 complex, u8 filter_slot, u8 parameter_block)
    360 {
    361 	b32 result = 0;
    362 	if (lib_error_check(kind >= 0 && kind < BeamformerFilterKind_Count, BF_LIB_ERR_KIND_INVALID_FILTER_KIND)) {
    363 		BeamformerFilterParameters fp = {.sampling_frequency = sampling_frequency, .complex = complex != 0};
    364 		#define X(kind, ...) sizeof(fp.kind),
    365 		read_only local_persist u32 kind_sizes[] = {BEAMFORMER_FILTER_KIND_LIST(,)};
    366 		#undef X
    367 		if (lib_error_check(kind_sizes[kind] == sizeof(f32) * filter_parameter_count,
    368 		                    BF_LIB_ERR_KIND_INVALID_FILTER_PARAM_COUNT))
    369 		{
    370 			/* NOTE(rnp): any filter kind struct works as base offset of union */
    371 			mem_copy(&fp.Kaiser, filter_parameters, kind_sizes[kind]);
    372 			result = beamformer_create_filter_base(kind, fp, filter_slot, parameter_block);
    373 		}
    374 	}
    375 	return result;
    376 }
    377 
    378 function b32
    379 beamformer_flush_commands(i32 timeout_ms)
    380 {
    381 	b32 result = lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms);
    382 	return result;
    383 }
    384 
    385 function b32
    386 beamformer_compute_indirect(BeamformerViewPlaneTag tag, u32 block)
    387 {
    388 	b32 result = 0;
    389 	if (check_shared_memory() &&
    390 	    lib_error_check(tag   < BeamformerViewPlaneTag_Count, BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE) &&
    391 	    lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    392 	                    BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED))
    393 	{
    394 		BeamformWork *work = try_push_work_queue();
    395 		if (work) {
    396 			work->kind = BeamformerWorkKind_ComputeIndirect;
    397 			work->compute_indirect_context.view_plane      = tag;
    398 			work->compute_indirect_context.parameter_block = block;
    399 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    400 			beamformer_flush_commands(0);
    401 			result = 1;
    402 		}
    403 	}
    404 	return result;
    405 }
    406 
    407 b32
    408 beamformer_start_compute(void)
    409 {
    410 	b32 result = beamformer_compute_indirect(0, 0);
    411 	return result;
    412 }
    413 
    414 b32
    415 beamformer_wait_for_compute_dispatch(i32 timeout_ms)
    416 {
    417 	b32 result = beamformer_flush_commands(timeout_ms);
    418 	/* NOTE(rnp): if you are calling this function you are probably about
    419 	 * to start some other work and it might be better to not do this... */
    420 	if (result) lib_release_lock(BeamformerSharedMemoryLockKind_DispatchCompute);
    421 	return result;
    422 }
    423 
    424 #define BEAMFORMER_UPLOAD_FNS \
    425 	X(channel_mapping, i16, 1, ChannelMapping) \
    426 	X(sparse_elements, i16, 1, SparseElements) \
    427 	X(focal_vectors,   f32, 2, FocalVectors)
    428 
    429 #define X(name, dtype, elements, region_name) \
    430 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \
    431 	b32 result = 0; \
    432 	if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BF_LIB_ERR_KIND_BUFFER_OVERFLOW)) { \
    433 		result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \
    434 		                                       BeamformerParameterBlockRegion_##region_name,  \
    435 		                                       g_beamformer_library_context.timeout_ms);      \
    436 	} \
    437 	return result; \
    438 }
    439 BEAMFORMER_UPLOAD_FNS
    440 #undef X
    441 
    442 #define X(name, dtype, ...) \
    443 b32 beamformer_push_##name (dtype *data, u32 count) { \
    444 	b32 result = beamformer_push_##name ##_at(data, count, 0); \
    445 	return result; \
    446 }
    447 BEAMFORMER_UPLOAD_FNS
    448 #undef X
    449 
    450 function b32
    451 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms)
    452 {
    453 	b32 result = 0;
    454 	if (check_shared_memory()) {
    455 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    456 		if (lib_error_check(data_size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_BUFFER_OVERFLOW)) {
    457 			if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
    458 				if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    459 					mem_copy(scratch.beg, data, data_size);
    460 					/* TODO(rnp): need a better way to communicate this */
    461 					g_beamformer_library_context.bp->scratch_rf_size = data_size;
    462 					lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    463 					result = 1;
    464 				}
    465 			}
    466 		}
    467 	}
    468 	return result;
    469 }
    470 
    471 b32
    472 beamformer_push_data(void *data, u32 data_size)
    473 {
    474 	return beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms);
    475 }
    476 
    477 b32
    478 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot)
    479 {
    480 	b32 result = beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms);
    481 	if (result) result = beamformer_compute_indirect(image_plane_tag, parameter_slot);
    482 	return result;
    483 }
    484 
    485 b32
    486 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block)
    487 {
    488 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), block,
    489 	                                           BeamformerParameterBlockRegion_Parameters,
    490 	                                           g_beamformer_library_context.timeout_ms);
    491 	return result;
    492 }
    493 
    494 b32
    495 beamformer_push_parameters(BeamformerParameters *bp)
    496 {
    497 	b32 result = beamformer_push_parameters_at(bp, 0);
    498 	return result;
    499 }
    500 
    501 b32
    502 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block)
    503 {
    504 	b32 result = validate_simple_parameters(bp);
    505 	if (result) {
    506 		result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block);
    507 		result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block);
    508 		result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block);
    509 		if (bp->das_shader_id == BeamformerDASKind_UFORCES || bp->das_shader_id == BeamformerDASKind_UHERCULES)
    510 			result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block);
    511 
    512 		alignas(64) v2 focal_vectors[countof(bp->steering_angles)];
    513 		for (u32 i = 0; i < countof(bp->steering_angles); i++)
    514 			focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}};
    515 		result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block);
    516 
    517 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++)
    518 			result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block);
    519 	}
    520 	return result;
    521 }
    522 
    523 b32
    524 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp)
    525 {
    526 	b32 result = beamformer_push_simple_parameters_at(bp, 0);
    527 	return result;
    528 }
    529 
    530 b32
    531 beamformer_push_parameters_ui(BeamformerUIParameters *bp)
    532 {
    533 	b32 result = parameter_block_region_upload_explicit(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    534 	                                                    offsetof(BeamformerParameterBlock, parameters_ui),
    535 	                                                    g_beamformer_library_context.timeout_ms);
    536 	return result;
    537 }
    538 
    539 b32
    540 beamformer_push_parameters_head(BeamformerParametersHead *bp)
    541 {
    542 	b32 result = parameter_block_region_upload_explicit(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    543 	                                                    offsetof(BeamformerParameterBlock, parameters_head),
    544 	                                                    g_beamformer_library_context.timeout_ms);
    545 	return result;
    546 }
    547 
    548 function b32
    549 beamformer_export_buffer(BeamformerExportContext export_context)
    550 {
    551 	BeamformWork *work = try_push_work_queue();
    552 	b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0);
    553 	if (result) {
    554 		work->export_context = export_context;
    555 		work->kind = BeamformerWorkKind_ExportBuffer;
    556 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    557 		beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    558 	}
    559 	return result;
    560 }
    561 
    562 function b32
    563 beamformer_read_output(void *out, iz size, i32 timeout_ms)
    564 {
    565 	b32 result = 0;
    566 	if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    567 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    568 			Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    569 			mem_copy(out, scratch.beg, (uz)size);
    570 			lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    571 			result = 1;
    572 		}
    573 		lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync);
    574 	}
    575 	return result;
    576 }
    577 
    578 b32
    579 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size,
    580                          void *out_data, int32_t timeout_ms)
    581 {
    582 	b32 result = validate_simple_parameters(bp);
    583 	if (result) {
    584 		bp->output_points[0] = MAX(1, bp->output_points[0]);
    585 		bp->output_points[1] = MAX(1, bp->output_points[1]);
    586 		bp->output_points[2] = MAX(1, bp->output_points[2]);
    587 
    588 		beamformer_push_simple_parameters(bp);
    589 
    590 		b32 complex = 0;
    591 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
    592 			BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
    593 			complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
    594 		}
    595 
    596 		iz output_size = bp->output_points[0] * bp->output_points[1] * bp->output_points[2] * (i32)sizeof(f32);
    597 		if (complex) output_size *= 2;
    598 
    599 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    600 		if (result && lib_error_check(output_size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)
    601 		    && beamformer_push_data_with_compute(data, data_size, 0, 0))
    602 		{
    603 			BeamformerExportContext export;
    604 			export.kind = BeamformerExportKind_BeamformedData;
    605 			export.size = (u32)output_size;
    606 			if (beamformer_export_buffer(export)) {
    607 				/* NOTE(rnp): if this fails it just means that the work from push_data hasn't
    608 				 * started yet. This is here to catch the other case where the work started
    609 				 * and finished before we finished queuing the export work item */
    610 				beamformer_flush_commands(0);
    611 
    612 				result = beamformer_read_output(out_data, output_size, timeout_ms);
    613 			}
    614 		}
    615 	}
    616 	return result;
    617 }
    618 
    619 b32
    620 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    621 {
    622 	static_assert(sizeof(*output) <= BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE,
    623 	              "timing table size exceeds scratch space");
    624 
    625 	b32 result = 0;
    626 	if (check_shared_memory()) {
    627 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    628 		if (lib_error_check(arena_capacity(&scratch, u8) <= (iz)sizeof(*output), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)) {
    629 			BeamformerExportContext export;
    630 			export.kind = BeamformerExportKind_Stats;
    631 			export.size = sizeof(*output);
    632 			if (beamformer_export_buffer(export) && beamformer_flush_commands(0))
    633 				result = beamformer_read_output(output, sizeof(*output), timeout_ms);
    634 		}
    635 	}
    636 	return result;
    637 }
    638 
    639 i32
    640 beamformer_live_parameters_get_dirty_flag(void)
    641 {
    642 	i32 result = -1;
    643 	if (check_shared_memory()) {
    644 		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
    645 		if (flag != 32) {
    646 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1 << flag));
    647 			result = (i32)flag;
    648 		}
    649 	}
    650 	return result;
    651 }
    652 
    653 BeamformerLiveImagingParameters *
    654 beamformer_get_live_parameters(void)
    655 {
    656 	BeamformerLiveImagingParameters *result = 0;
    657 	if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters;
    658 	return result;
    659 }
    660 
    661 b32
    662 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    663 {
    664 	b32 result = 0;
    665 	if (check_shared_memory()) {
    666 		mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new));
    667 		memory_write_barrier();
    668 		result = 1;
    669 	}
    670 	return result;
    671 }