ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (23477B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #include "../util.h"
      5 #include "../generated/beamformer.meta.c"
      6 #include "../beamformer_parameters.h"
      7 #include "ogl_beamformer_lib_base.h"
      8 
      9 #if OS_LINUX
     10 #include "../os_linux.c"
     11 #elif OS_WINDOWS
     12 #include "../os_win32.c"
     13 
     14 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     15 
     16 #else
     17 #error Unsupported Platform
     18 #endif
     19 
     20 #include "../beamformer_shared_memory.c"
     21 
     22 global struct {
     23 	SharedMemoryRegion      shared_memory;
     24 	BeamformerSharedMemory *bp;
     25 	i32                     timeout_ms;
     26 	BeamformerLibErrorKind  last_error;
     27 } g_beamformer_library_context;
     28 
     29 #if OS_LINUX
     30 
     31 function b32
     32 os_reserve_region_locks(iptr os_context, u32 count)
     33 {
     34 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     35 	return result;
     36 }
     37 
     38 function SharedMemoryRegion
     39 os_open_shared_memory_area(char *name)
     40 {
     41 	SharedMemoryRegion result = {0};
     42 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     43 	if (fd > 0) {
     44 		void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     45 		if (new != MAP_FAILED) result.region = new;
     46 		close(fd);
     47 	}
     48 	return result;
     49 }
     50 
     51 #elif OS_WINDOWS
     52 
     53 function b32
     54 os_reserve_region_locks(iptr os_context, u32 count)
     55 {
     56 	local_persist iptr semaphores[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
     57 	w32_shared_memory_context *ctx = (typeof(ctx))os_context;
     58 
     59 	b32 result = count <= BeamformerMaxParameterBlockSlots;
     60 	if (result) {
     61 		count += BeamformerSharedMemoryLockKind_Count;
     62 		if (count > ctx->reserved_count) {
     63 			u8 buffer[1024];
     64 			Stream sb = {.data = buffer, .cap = countof(buffer)};
     65 			stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_"));
     66 
     67 			u32 new_reserved_count;
     68 			for (new_reserved_count = ctx->reserved_count;
     69 			     new_reserved_count < count && result;
     70 			     new_reserved_count++)
     71 			{
     72 				Stream lb = sb;
     73 				stream_append_u64(&lb, new_reserved_count);
     74 				stream_append_byte(&lb, 0);
     75 				semaphores[new_reserved_count] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data);
     76 				result &= semaphores[new_reserved_count] != INVALID_FILE;
     77 			}
     78 
     79 			if (result) {
     80 				ctx->semaphores     = semaphores;
     81 				ctx->reserved_count = count;
     82 			} else {
     83 				for (u32 j = ctx->reserved_count; j < new_reserved_count; j++)
     84 					CloseHandle(semaphores[j]);
     85 			}
     86 		} else if (count < ctx->reserved_count) {
     87 			for (u32 i = ctx->reserved_count; i > count;)
     88 				CloseHandle(semaphores[--i]);
     89 			ctx->reserved_count = count;
     90 		}
     91 	}
     92 	return result;
     93 }
     94 
     95 function SharedMemoryRegion
     96 os_open_shared_memory_area(char *name)
     97 {
     98 	local_persist w32_shared_memory_context ctx = {0};
     99 	SharedMemoryRegion result = {0};
    100 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
    101 	if (h != INVALID_FILE) {
    102 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE);
    103 		if (new && os_reserve_region_locks((iptr)&ctx, 1)) {
    104 			result.region     = new;
    105 			result.os_context = (iptr)&ctx;
    106 		}
    107 		CloseHandle(h);
    108 	}
    109 	return result;
    110 }
    111 
    112 #endif
    113 
    114 function b32
    115 lib_error_check(b32 condition, BeamformerLibErrorKind error_kind)
    116 {
    117 	b32 result = condition;
    118 	if (!result) g_beamformer_library_context.last_error = error_kind;
    119 	assert(result);
    120 	return result;
    121 }
    122 
    123 function b32
    124 check_shared_memory(void)
    125 {
    126 	if (!g_beamformer_library_context.shared_memory.region) {
    127 		g_beamformer_library_context.shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
    128 		if (lib_error_check(g_beamformer_library_context.shared_memory.region != 0, BF_LIB_ERR_KIND_SHARED_MEMORY)) {
    129 			u32 version = ((BeamformerSharedMemory *)g_beamformer_library_context.shared_memory.region)->version;
    130 			if (lib_error_check(version == BEAMFORMER_SHARED_MEMORY_VERSION, BF_LIB_ERR_KIND_VERSION_MISMATCH))
    131 				g_beamformer_library_context.bp = g_beamformer_library_context.shared_memory.region;
    132 		}
    133 	}
    134 
    135 	b32 result = 0;
    136 	if (g_beamformer_library_context.bp)
    137 		result = lib_error_check(!g_beamformer_library_context.bp->invalid, BF_LIB_ERR_KIND_INVALID_ACCESS);
    138 	return result;
    139 }
    140 
    141 function b32
    142 valid_parameter_block(u32 block)
    143 {
    144 	b32 result = check_shared_memory();
    145 	if (result) {
    146 		result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks,
    147 		                         BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED);
    148 	}
    149 	return result;
    150 }
    151 
    152 function BeamformWork *
    153 try_push_work_queue(void)
    154 {
    155 	BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue);
    156 	lib_error_check(result != 0, BF_LIB_ERR_KIND_WORK_QUEUE_FULL);
    157 	return result;
    158 }
    159 
    160 function b32
    161 lib_try_lock(i32 lock, i32 timeout_ms)
    162 {
    163 	b32 result = os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory,
    164 	                                          g_beamformer_library_context.bp->locks,
    165 	                                          lock, (u32)timeout_ms);
    166 	lib_error_check(result, BF_LIB_ERR_KIND_SYNC_VARIABLE);
    167 	return result;
    168 }
    169 
    170 function void
    171 lib_release_lock(i32 lock)
    172 {
    173 	os_shared_memory_region_unlock(&g_beamformer_library_context.shared_memory,
    174 	                               g_beamformer_library_context.bp->locks, (i32)lock);
    175 }
    176 
    177 u32
    178 beamformer_get_api_version(void)
    179 {
    180 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    181 }
    182 
    183 const char *
    184 beamformer_error_string(BeamformerLibErrorKind kind)
    185 {
    186 	#define X(type, num, string) string,
    187 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    188 	#undef X
    189 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    190 }
    191 
    192 BeamformerLibErrorKind
    193 beamformer_get_last_error(void)
    194 {
    195 	return g_beamformer_library_context.last_error;
    196 }
    197 
    198 const char *
    199 beamformer_get_last_error_string(void)
    200 {
    201 	return beamformer_error_string(beamformer_get_last_error());
    202 }
    203 
    204 b32
    205 beamformer_set_global_timeout(i32 timeout_ms)
    206 {
    207 	b32 result = lib_error_check(timeout_ms >= -1, BF_LIB_ERR_KIND_INVALID_TIMEOUT);
    208 	if (result) g_beamformer_library_context.timeout_ms = timeout_ms;
    209 	return result;
    210 }
    211 
    212 b32
    213 beamformer_reserve_parameter_blocks(uint32_t count)
    214 {
    215 	b32 result = 0;
    216 	if (check_shared_memory() &&
    217 	    lib_error_check(os_reserve_region_locks(g_beamformer_library_context.shared_memory.os_context, count),
    218 	                    BF_LIB_ERR_KIND_PARAMETER_BLOCK_OVERFLOW))
    219 	{
    220 		u32 old_count = g_beamformer_library_context.bp->reserved_parameter_blocks;
    221 		g_beamformer_library_context.bp->reserved_parameter_blocks = count;
    222 		for (u32 i = old_count; i < count; i++)
    223 			zero_struct(beamformer_parameter_block(g_beamformer_library_context.bp, i));
    224 		result = 1;
    225 	}
    226 	return result;
    227 }
    228 
    229 function b32
    230 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    231 {
    232 	b32 result = lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW);
    233 	if (result) {
    234 		for (u32 i = 0; i < shader_count; i++)
    235 			result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
    236 		if (!result) {
    237 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
    238 		} else if (shaders[0] != BeamformerShaderKind_Demodulate &&
    239 		           shaders[0] != BeamformerShaderKind_Decode)
    240 		{
    241 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_START_SHADER;
    242 			result = 0;
    243 		} else if (shaders[0] == BeamformerShaderKind_Demodulate &&
    244 		           !(data_kind == BeamformerDataKind_Int16 || data_kind == BeamformerDataKind_Float32))
    245 		{
    246 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_DEMOD_DATA_KIND;
    247 			result = 0;
    248 		}
    249 	}
    250 	return result;
    251 }
    252 
    253 function b32
    254 validate_simple_parameters(BeamformerSimpleParameters *bp)
    255 {
    256 	b32 result = check_shared_memory();
    257 	if (result) {
    258 		result &= bp->channel_count <= BeamformerMaxChannelCount;
    259 		if (!result)
    260 			g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_SIMPLE_PARAMETERS;
    261 	}
    262 	return result;
    263 }
    264 
    265 function b32
    266 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
    267                               u32 block_offset, i32 timeout_ms)
    268 {
    269 	i32 lock   = BeamformerSharedMemoryLockKind_Count + (i32)block;
    270 	b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms);
    271 	if (result) {
    272 		mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset,
    273 		         data, size);
    274 		mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id);
    275 		lib_release_lock(lock);
    276 	}
    277 	return result;
    278 }
    279 
    280 b32
    281 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block)
    282 {
    283 	u32 offset  = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline];
    284 	offset     += offsetof(BeamformerComputePipeline, parameters);
    285 	offset     += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters);
    286 	b32 result  = parameter_block_region_upload(&parameter, sizeof(BeamformerShaderParameters), block,
    287 	                                            BeamformerParameterBlockRegion_ComputePipeline, offset,
    288 	                                            g_beamformer_library_context.timeout_ms);
    289 	return result;
    290 }
    291 
    292 b32
    293 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter)
    294 {
    295 	b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0);
    296 	return result;
    297 }
    298 
    299 b32
    300 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block)
    301 {
    302 	b32 result = 0;
    303 	if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) {
    304 		i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block;
    305 		if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) {
    306 			BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    307 			mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders));
    308 			mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block,
    309 			                                  BeamformerParameterBlockRegion_ComputePipeline);
    310 			b->pipeline.shader_count = shader_count;
    311 			b->pipeline.data_kind    = data_kind;
    312 			lib_release_lock(lock);
    313 			result = 1;
    314 		}
    315 	}
    316 	return result;
    317 }
    318 
    319 b32
    320 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
    321 {
    322 	b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0);
    323 	return result;
    324 }
    325 
    326 function b32
    327 beamformer_create_filter_base(BeamformerFilterParameters params, u8 filter_slot, u8 parameter_block)
    328 {
    329 	b32 result = 0;
    330 	if (check_shared_memory()) {
    331 		BeamformWork *work = try_push_work_queue();
    332 		if (work) {
    333 			BeamformerCreateFilterContext *ctx = &work->create_filter_context;
    334 			work->kind = BeamformerWorkKind_CreateFilter;
    335 			ctx->parameters      = params;
    336 			ctx->filter_slot     = filter_slot     % BeamformerFilterSlots;
    337 			ctx->parameter_block = parameter_block % BeamformerMaxParameterBlockSlots;
    338 			beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    339 			result = 1;
    340 		}
    341 	}
    342 	return result;
    343 }
    344 
    345 b32
    346 beamformer_create_filter(BeamformerFilterKind kind, void *filter_parameters, u32 filter_size,
    347                          f32 sampling_frequency, b32 complex, u8 filter_slot, u8 parameter_block)
    348 {
    349 	b32 result = 0;
    350 	if (lib_error_check(kind >= 0 && kind < BeamformerFilterKind_Count, BF_LIB_ERR_KIND_INVALID_FILTER_KIND)) {
    351 		BeamformerFilterParameters fp = {0};
    352 		/* NOTE(rnp): any parameter struct works as base offset */
    353 		filter_size = MIN(filter_size, sizeof(fp) - offsetof(BeamformerFilterParameters, kaiser));
    354 		mem_copy(&fp.kaiser, filter_parameters, filter_size);
    355 		fp.kind               = kind;
    356 		fp.complex            = complex != 0;
    357 		fp.sampling_frequency = sampling_frequency;
    358 		result = beamformer_create_filter_base(fp, filter_slot, parameter_block);
    359 	}
    360 	return result;
    361 }
    362 
    363 function void
    364 beamformer_flush_commands(void)
    365 {
    366 	i32 lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    367 	os_shared_memory_region_lock(&g_beamformer_library_context.shared_memory,
    368 	                             g_beamformer_library_context.bp->locks, lock, 0);
    369 }
    370 
    371 #define BEAMFORMER_UPLOAD_FNS \
    372 	X(channel_mapping,               i16, 1, ChannelMapping) \
    373 	X(focal_vectors,                 f32, 2, FocalVectors)   \
    374 	X(sparse_elements,               i16, 1, SparseElements) \
    375 	X(transmit_receive_orientations, u8,  1, TransmitReceiveOrientations)
    376 
    377 #define X(name, dtype, elements, region_name) \
    378 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \
    379 	b32 result = 0; \
    380 	if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BF_LIB_ERR_KIND_BUFFER_OVERFLOW)) { \
    381 		result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \
    382 		                                       BeamformerParameterBlockRegion_##region_name,  \
    383 		                                       offsetof(BeamformerParameterBlock, name),      \
    384 		                                       g_beamformer_library_context.timeout_ms);      \
    385 	} \
    386 	return result; \
    387 }
    388 BEAMFORMER_UPLOAD_FNS
    389 #undef X
    390 
    391 #define X(name, dtype, ...) \
    392 b32 beamformer_push_##name (dtype *data, u32 count) { \
    393 	b32 result = beamformer_push_##name ##_at(data, count, 0); \
    394 	return result; \
    395 }
    396 BEAMFORMER_UPLOAD_FNS
    397 #undef X
    398 
    399 function b32
    400 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block)
    401 {
    402 	b32 result = 0;
    403 	Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    404 	BeamformerParameterBlock *b  = beamformer_parameter_block(g_beamformer_library_context.bp, block);
    405 	BeamformerParameters     *bp = &b->parameters;
    406 	BeamformerDataKind data_kind = b->pipeline.data_kind;
    407 
    408 	u32 size     = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind];
    409 	u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind];
    410 
    411 	if (lib_error_check(size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_BUFFER_OVERFLOW) &&
    412 	    lib_error_check(size <= data_size && data_size == raw_size, BF_LIB_ERR_KIND_DATA_SIZE_MISMATCH))
    413 	{
    414 		if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
    415 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    416 				u32 channel_count      = bp->channel_count;
    417 				u32 out_channel_stride = beamformer_data_kind_element_count[data_kind] * bp->sample_count * bp->acquisition_count;
    418 				u32 in_channel_stride  = beamformer_data_kind_element_count[data_kind] * bp->raw_data_dimensions.x;
    419 
    420 				for (u32 channel = 0; channel < channel_count; channel++) {
    421 					u16 data_channel = (u16)b->channel_mapping[channel];
    422 					u32 out_off = out_channel_stride * channel;
    423 					u32 in_off  = in_channel_stride  * data_channel;
    424 					for (u32 sample = 0; sample < out_channel_stride; sample++, out_off++, in_off++) {
    425 						switch (data_kind) {
    426 						case BeamformerDataKind_Int16:
    427 						case BeamformerDataKind_Int16Complex:
    428 						{
    429 							((i16 *)scratch.beg)[out_off] = ((i16 *)data)[in_off];
    430 						}break;
    431 						case BeamformerDataKind_Float32:
    432 						case BeamformerDataKind_Float32Complex:
    433 						{
    434 							((f32 *)scratch.beg)[out_off] = ((f32 *)data)[in_off];
    435 						}break;
    436 						InvalidDefaultCase;
    437 						}
    438 					}
    439 				}
    440 
    441 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    442 				/* TODO(rnp): need a better way to communicate this */
    443 				u64 rf_block_rf_size = (u64)block << 32ULL | (u64)size;
    444 				atomic_store_u64(&g_beamformer_library_context.bp->rf_block_rf_size, rf_block_rf_size);
    445 				result = 1;
    446 			}
    447 		}
    448 	}
    449 	return result;
    450 }
    451 
    452 b32
    453 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot)
    454 {
    455 	b32 result = 0;
    456 	if (check_shared_memory()) {
    457 		u32 reserved_blocks = g_beamformer_library_context.bp->reserved_parameter_blocks;
    458 		if (lib_error_check(image_plane_tag < BeamformerViewPlaneTag_Count, BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE) &&
    459 		    lib_error_check(parameter_slot < reserved_blocks, BF_LIB_ERR_KIND_PARAMETER_BLOCK_UNALLOCATED) &&
    460 		    beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms, parameter_slot))
    461 		{
    462 			BeamformWork *work = try_push_work_queue();
    463 			if (work) {
    464 				work->kind = BeamformerWorkKind_ComputeIndirect;
    465 				work->compute_indirect_context.view_plane      = image_plane_tag;
    466 				work->compute_indirect_context.parameter_block = parameter_slot;
    467 				beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    468 				beamformer_flush_commands();
    469 				result = 1;
    470 			}
    471 		}
    472 	}
    473 	return result;
    474 }
    475 
    476 b32
    477 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block)
    478 {
    479 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), block,
    480 	                                           BeamformerParameterBlockRegion_Parameters,
    481 	                                           offsetof(BeamformerParameterBlock, parameters),
    482 	                                           g_beamformer_library_context.timeout_ms);
    483 	return result;
    484 }
    485 
    486 b32
    487 beamformer_push_parameters(BeamformerParameters *bp)
    488 {
    489 	b32 result = beamformer_push_parameters_at(bp, 0);
    490 	return result;
    491 }
    492 
    493 b32
    494 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block)
    495 {
    496 	b32 result = validate_simple_parameters(bp);
    497 	if (result) {
    498 		alignas(64) v2 focal_vectors[countof(bp->steering_angles)];
    499 		for (u32 i = 0; i < countof(bp->steering_angles); i++)
    500 			focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}};
    501 
    502 		result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block);
    503 		result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block);
    504 		result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block);
    505 		result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block);
    506 		result &= beamformer_push_transmit_receive_orientations_at(bp->transmit_receive_orientations,
    507 		                                                           bp->acquisition_count, block);
    508 
    509 		if (bp->das_shader_id == BeamformerAcquisitionKind_UFORCES || bp->das_shader_id == BeamformerAcquisitionKind_UHERCULES)
    510 			result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block);
    511 
    512 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++)
    513 			result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block);
    514 	}
    515 	return result;
    516 }
    517 
    518 b32
    519 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp)
    520 {
    521 	b32 result = beamformer_push_simple_parameters_at(bp, 0);
    522 	return result;
    523 }
    524 
    525 b32
    526 beamformer_push_parameters_ui(BeamformerUIParameters *bp)
    527 {
    528 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    529 	                                           offsetof(BeamformerParameterBlock, parameters_ui),
    530 	                                           g_beamformer_library_context.timeout_ms);
    531 	return result;
    532 }
    533 
    534 b32
    535 beamformer_push_parameters_head(BeamformerParametersHead *bp)
    536 {
    537 	b32 result = parameter_block_region_upload(bp, sizeof(*bp), 0, BeamformerParameterBlockRegion_Parameters,
    538 	                                           offsetof(BeamformerParameterBlock, parameters_head),
    539 	                                           g_beamformer_library_context.timeout_ms);
    540 	return result;
    541 }
    542 
    543 function b32
    544 beamformer_export_buffer(BeamformerExportContext export_context)
    545 {
    546 	BeamformWork *work = try_push_work_queue();
    547 	b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0);
    548 	if (result) {
    549 		work->export_context = export_context;
    550 		work->kind = BeamformerWorkKind_ExportBuffer;
    551 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    552 		beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue);
    553 	}
    554 	return result;
    555 }
    556 
    557 function b32
    558 beamformer_export(BeamformerExportContext export, void *out, i32 timeout_ms)
    559 {
    560 	b32 result = 0;
    561 	if (beamformer_export_buffer(export)) {
    562 		/* NOTE(rnp): if this fails it just means that the work from push_data hasn't
    563 		 * started yet. This is here to catch the other case where the work started
    564 		 * and finished before we finished queuing the export work item */
    565 		beamformer_flush_commands();
    566 
    567 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    568 			if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    569 				Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    570 				mem_copy(out, scratch.beg, export.size);
    571 				lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    572 				result = 1;
    573 			}
    574 			lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync);
    575 		}
    576 	}
    577 	return result;
    578 }
    579 
    580 b32
    581 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size,
    582                          void *out_data, int32_t timeout_ms)
    583 {
    584 	b32 result = validate_simple_parameters(bp);
    585 	if (result) {
    586 		bp->output_points.E[0] = MAX(1, bp->output_points.E[0]);
    587 		bp->output_points.E[1] = MAX(1, bp->output_points.E[1]);
    588 		bp->output_points.E[2] = MAX(1, bp->output_points.E[2]);
    589 
    590 		beamformer_push_simple_parameters(bp);
    591 
    592 		b32 complex = 0;
    593 		for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
    594 			BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
    595 			complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
    596 		}
    597 
    598 		iz output_size = bp->output_points.x * bp->output_points.y * bp->output_points.z * (i32)sizeof(f32);
    599 		if (complex) output_size *= 2;
    600 
    601 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    602 		if (result && lib_error_check(output_size <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)
    603 		    && beamformer_push_data_with_compute(data, data_size, 0, 0))
    604 		{
    605 			BeamformerExportContext export;
    606 			export.kind = BeamformerExportKind_BeamformedData;
    607 			export.size = (u32)output_size;
    608 			result = beamformer_export(export, out_data, timeout_ms);
    609 		}
    610 	}
    611 	return result;
    612 }
    613 
    614 b32
    615 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    616 {
    617 	static_assert(sizeof(*output) <= BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE,
    618 	              "timing table size exceeds scratch space");
    619 
    620 	b32 result = 0;
    621 	if (check_shared_memory()) {
    622 		Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp);
    623 		if (lib_error_check((iz)sizeof(*output) <= arena_capacity(&scratch, u8), BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW)) {
    624 			BeamformerExportContext export;
    625 			export.kind = BeamformerExportKind_Stats;
    626 			export.size = sizeof(*output);
    627 			result = beamformer_export(export, output, timeout_ms);
    628 		}
    629 	}
    630 	return result;
    631 }
    632 
    633 i32
    634 beamformer_live_parameters_get_dirty_flag(void)
    635 {
    636 	i32 result = -1;
    637 	if (check_shared_memory()) {
    638 		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
    639 		if (flag != 32) {
    640 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
    641 			result = (i32)flag;
    642 		}
    643 	}
    644 	return result;
    645 }
    646 
    647 BeamformerLiveImagingParameters *
    648 beamformer_get_live_parameters(void)
    649 {
    650 	BeamformerLiveImagingParameters *result = 0;
    651 	if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters;
    652 	return result;
    653 }
    654 
    655 b32
    656 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    657 {
    658 	b32 result = 0;
    659 	if (check_shared_memory()) {
    660 		mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new));
    661 		memory_write_barrier();
    662 		result = 1;
    663 	}
    664 	return result;
    665 }