ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (13989B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #include "../util.h"
      5 #include "../beamformer_parameters.h"
      6 #include "ogl_beamformer_lib_base.h"
      7 #include "../beamformer_work_queue.c"
      8 
      9 global SharedMemoryRegion      g_shared_memory;
     10 global BeamformerSharedMemory *g_bp;
     11 global BeamformerLibErrorKind  g_lib_last_error;
     12 
     13 #if OS_LINUX
     14 #include "../os_linux.c"
     15 #elif OS_WINDOWS
     16 #include "../os_win32.c"
     17 
     18 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     19 
     20 #else
     21 #error Unsupported Platform
     22 #endif
     23 
     24 #if OS_LINUX
     25 
     26 function SharedMemoryRegion
     27 os_open_shared_memory_area(char *name)
     28 {
     29 	SharedMemoryRegion result = {0};
     30 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     31 	if (fd > 0) {
     32 		void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     33 		if (new != MAP_FAILED) result.region = new;
     34 		close(fd);
     35 	}
     36 	return result;
     37 }
     38 
     39 #elif OS_WINDOWS
     40 
     41 function SharedMemoryRegion
     42 os_open_shared_memory_area(char *name)
     43 {
     44 	SharedMemoryRegion result = {0};
     45 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
     46 	if (h != INVALID_FILE) {
     47 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, BEAMFORMER_SHARED_MEMORY_SIZE);
     48 		if (new) {
     49 			u8 buffer[1024];
     50 			Stream sb = {.data = buffer, .cap = 1024};
     51 			stream_append_s8s(&sb, c_str_to_s8(name), s8("_lock_"));
     52 			local_persist iptr semaphores[BeamformerSharedMemoryLockKind_Count];
     53 			local_persist w32_shared_memory_context ctx = {.semaphores = semaphores};
     54 			b32 all_semaphores = 1;
     55 			for (i32 i = 0; i < countof(semaphores); i++) {
     56 				Stream lb = sb;
     57 				stream_append_i64(&lb, i);
     58 				stream_append_byte(&lb, 0);
     59 				semaphores[i] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data);
     60 				all_semaphores &= semaphores[i] != INVALID_FILE;
     61 			}
     62 			if (all_semaphores) {
     63 				result.region     = new;
     64 				result.os_context = (iptr)&ctx;
     65 			}
     66 		}
     67 		CloseHandle(h);
     68 	}
     69 	return result;
     70 }
     71 
     72 #endif
     73 
     74 function b32
     75 check_shared_memory(void)
     76 {
     77 	b32 result = 1;
     78 	if (!g_shared_memory.region) {
     79 		g_shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
     80 		if (!g_shared_memory.region) {
     81 			g_lib_last_error = BF_LIB_ERR_KIND_SHARED_MEMORY;
     82 			result = 0;
     83 		} else if (((BeamformerSharedMemory *)g_shared_memory.region)->version !=
     84 		           BEAMFORMER_SHARED_MEMORY_VERSION)
     85 		{
     86 			g_lib_last_error = BF_LIB_ERR_KIND_VERSION_MISMATCH;
     87 			result = 0;
     88 		}
     89 	}
     90 	if (result && ((BeamformerSharedMemory *)g_shared_memory.region)->invalid) {
     91 		g_lib_last_error = BF_LIB_ERR_KIND_INVALID_ACCESS;
     92 		result = 0;
     93 	}
     94 	if (result) g_bp = g_shared_memory.region;
     95 	return result;
     96 }
     97 
     98 function BeamformWork *
     99 try_push_work_queue(void)
    100 {
    101 	BeamformWork *result = beamform_work_queue_push(&g_bp->external_work_queue);
    102 	if (!result) g_lib_last_error = BF_LIB_ERR_KIND_WORK_QUEUE_FULL;
    103 	return result;
    104 }
    105 
    106 function b32
    107 lib_try_lock(BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
    108 {
    109 	b32 result = os_shared_memory_region_lock(&g_shared_memory, g_bp->locks, (i32)lock, timeout_ms);
    110 	if (!result) g_lib_last_error = BF_LIB_ERR_KIND_SYNC_VARIABLE;
    111 	return result;
    112 }
    113 
    114 function void
    115 lib_release_lock(BeamformerSharedMemoryLockKind lock)
    116 {
    117 	os_shared_memory_region_unlock(&g_shared_memory, g_bp->locks, (i32)lock);
    118 }
    119 
    120 function b32
    121 try_wait_sync(BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
    122 {
    123 	b32 result = lib_try_lock(lock, 0) && lib_try_lock(lock, timeout_ms);
    124 	/* TODO(rnp): non-critical race condition */
    125 	if (result) lib_release_lock(lock);
    126 	return result;
    127 }
    128 
    129 u32
    130 beamformer_get_api_version(void)
    131 {
    132 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    133 }
    134 
    135 const char *
    136 beamformer_error_string(BeamformerLibErrorKind kind)
    137 {
    138 	#define X(type, num, string) string,
    139 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    140 	#undef X
    141 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    142 }
    143 
    144 BeamformerLibErrorKind
    145 beamformer_get_last_error(void)
    146 {
    147 	return g_lib_last_error;
    148 }
    149 
    150 const char *
    151 beamformer_get_last_error_string(void)
    152 {
    153 	return beamformer_error_string(beamformer_get_last_error());
    154 }
    155 
    156 b32
    157 set_beamformer_pipeline(i32 *stages, i32 stages_count)
    158 {
    159 	b32 result = 0;
    160 	if (stages_count <= countof(g_bp->compute_stages)) {
    161 		if (check_shared_memory()) {
    162 			g_bp->compute_stages_count = 0;
    163 			for (i32 i = 0; i < stages_count; i++) {
    164 				if (BETWEEN(stages[i], 0, BeamformerShaderKind_ComputeCount)) {
    165 					g_bp->compute_stages[g_bp->compute_stages_count++] = stages[i];
    166 				}
    167 			}
    168 			result = g_bp->compute_stages_count == stages_count;
    169 			if (!result) {
    170 				g_lib_last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
    171 				g_bp->compute_stages_count = 0;
    172 			}
    173 		}
    174 	} else {
    175 		g_lib_last_error = BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW;
    176 	}
    177 	return result;
    178 }
    179 
    180 b32
    181 beamformer_start_compute(i32 timeout_ms)
    182 {
    183 	i32 lock   = BeamformerSharedMemoryLockKind_DispatchCompute;
    184 	b32 result = check_shared_memory() && lib_try_lock(lock, timeout_ms);
    185 	return result;
    186 }
    187 
    188 b32
    189 beamformer_wait_for_compute_dispatch(i32 timeout_ms)
    190 {
    191 	i32 lock   = BeamformerSharedMemoryLockKind_DispatchCompute;
    192 	b32 result = check_shared_memory() && lib_try_lock(lock, timeout_ms);
    193 	/* NOTE(rnp): if you are calling this function you are probably about
    194 	 * to start some other work and it might be better to not do this... */
    195 	if (result) lib_release_lock(BeamformerSharedMemoryLockKind_DispatchCompute);
    196 	return result;
    197 }
    198 
    199 function b32
    200 beamformer_upload_buffer(void *data, u32 size, i32 store_offset, BeamformerUploadContext upload_context,
    201                          BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
    202 {
    203 	b32 result = 0;
    204 	if (check_shared_memory()) {
    205 		BeamformWork *work = try_push_work_queue();
    206 		result = work && lib_try_lock(lock, timeout_ms);
    207 		if (result) {
    208 			work->upload_context = upload_context;
    209 			work->kind = BeamformerWorkKind_UploadBuffer;
    210 			work->lock = lock;
    211 			mem_copy((u8 *)g_bp + store_offset, data, size);
    212 			if ((atomic_load_u32(&g_bp->dirty_regions) & (1 << (lock - 1))) == 0) {
    213 				atomic_or_u32(&g_bp->dirty_regions, (1 << (lock - 1)));
    214 				beamform_work_queue_push_commit(&g_bp->external_work_queue);
    215 			}
    216 			lib_release_lock(lock);
    217 		}
    218 	}
    219 	return result;
    220 }
    221 
    222 #define BEAMFORMER_UPLOAD_FNS \
    223 	X(channel_mapping, i16, 1, ChannelMapping, CHANNEL_MAPPING) \
    224 	X(sparse_elements, i16, 1, SparseElements, SPARSE_ELEMENTS) \
    225 	X(focal_vectors,   f32, 2, FocalVectors,   FOCAL_VECTORS)
    226 
    227 #define X(name, dtype, elements, lock_name, command) \
    228 b32 beamformer_push_##name (dtype *data, u32 count, i32 timeout_ms) { \
    229 	b32 result = 0; \
    230 	if (count <= countof(g_bp->name)) { \
    231 		BeamformerUploadContext uc = {0}; \
    232 		uc.shared_memory_offset = offsetof(BeamformerSharedMemory, name); \
    233 		uc.kind = BU_KIND_##command; \
    234 		uc.size = count * elements * sizeof(dtype); \
    235 		result = beamformer_upload_buffer(data, uc.size, uc.shared_memory_offset, uc, \
    236 		                                  BeamformerSharedMemoryLockKind_##lock_name, timeout_ms); \
    237 	} else { \
    238 		g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW; \
    239 	} \
    240 	return result; \
    241 }
    242 BEAMFORMER_UPLOAD_FNS
    243 #undef X
    244 
    245 function b32
    246 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, b32 start_from_main)
    247 {
    248 	b32 result = 0;
    249 	if (data_size <= BEAMFORMER_MAX_RF_DATA_SIZE) {
    250 		BeamformerUploadContext uc = {0};
    251 		uc.shared_memory_offset = BEAMFORMER_SCRATCH_OFF;
    252 		uc.size = data_size;
    253 		uc.kind = BU_KIND_RF_DATA;
    254 		result = beamformer_upload_buffer(data, data_size, uc.shared_memory_offset, uc,
    255 		                                  BeamformerSharedMemoryLockKind_ScratchSpace, timeout_ms);
    256 		if (result && start_from_main) atomic_store_u32(&g_bp->start_compute_from_main, 1);
    257 	} else {
    258 		g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW;
    259 	}
    260 	return result;
    261 }
    262 
    263 b32
    264 beamformer_push_data(void *data, u32 data_size, i32 timeout_ms)
    265 {
    266 	return beamformer_push_data_base(data, data_size, timeout_ms, 1);
    267 }
    268 
    269 b32
    270 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, i32 timeout_ms)
    271 {
    272 	b32 result = beamformer_push_data_base(data, data_size, timeout_ms, 0);
    273 	if (result) {
    274 		result = image_plane_tag < BeamformerViewPlaneTag_Count;
    275 		if (result) {
    276 			BeamformWork *work = try_push_work_queue();
    277 			if (work) {
    278 				work->kind = BeamformerWorkKind_ComputeIndirect;
    279 				work->compute_indirect_plane = image_plane_tag;
    280 				beamform_work_queue_push_commit(&g_bp->external_work_queue);
    281 				result = beamformer_start_compute(0);
    282 			}
    283 		} else {
    284 			g_lib_last_error = BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE;
    285 		}
    286 	}
    287 	return result;
    288 }
    289 
    290 b32
    291 beamformer_push_parameters(BeamformerParameters *bp, i32 timeout_ms)
    292 {
    293 	BeamformerUploadContext uc = {0};
    294 	uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
    295 	uc.size = sizeof(g_bp->parameters);
    296 	uc.kind = BU_KIND_PARAMETERS;
    297 	b32 result = beamformer_upload_buffer(bp, sizeof(*bp),
    298 	                                      offsetof(BeamformerSharedMemory, parameters), uc,
    299 	                                      BeamformerSharedMemoryLockKind_Parameters, timeout_ms);
    300 	return result;
    301 }
    302 
    303 b32
    304 beamformer_push_parameters_ui(BeamformerUIParameters *bp, i32 timeout_ms)
    305 {
    306 	BeamformerUploadContext uc = {0};
    307 	uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
    308 	uc.size = sizeof(g_bp->parameters);
    309 	uc.kind = BU_KIND_PARAMETERS;
    310 	b32 result = beamformer_upload_buffer(bp, sizeof(*bp),
    311 	                                      offsetof(BeamformerSharedMemory, parameters_ui), uc,
    312 	                                      BeamformerSharedMemoryLockKind_Parameters, timeout_ms);
    313 	return result;
    314 }
    315 
    316 b32
    317 beamformer_push_parameters_head(BeamformerParametersHead *bp, i32 timeout_ms)
    318 {
    319 	BeamformerUploadContext uc = {0};
    320 	uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
    321 	uc.size = sizeof(g_bp->parameters);
    322 	uc.kind = BU_KIND_PARAMETERS;
    323 	b32 result = beamformer_upload_buffer(bp, sizeof(*bp),
    324 	                                      offsetof(BeamformerSharedMemory, parameters_head), uc,
    325 	                                      BeamformerSharedMemoryLockKind_Parameters, timeout_ms);
    326 	return result;
    327 }
    328 
    329 b32
    330 set_beamformer_parameters(BeamformerParametersV0 *new_bp)
    331 {
    332 	b32 result = 1;
    333 	result &= beamformer_push_channel_mapping((i16 *)new_bp->channel_mapping,
    334 	                                          countof(new_bp->channel_mapping), 0);
    335 	result &= beamformer_push_sparse_elements((i16 *)new_bp->uforces_channels,
    336 	                                          countof(new_bp->uforces_channels), 0);
    337 	v2 focal_vectors[256];
    338 	for (u32 i = 0; i < countof(focal_vectors); i++)
    339 		focal_vectors[i] = (v2){{new_bp->transmit_angles[i], new_bp->focal_depths[i]}};
    340 	result &= beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0);
    341 	result &= beamformer_push_parameters((BeamformerParameters *)&new_bp->xdc_transform, 0);
    342 	return result;
    343 }
    344 
    345 b32
    346 send_data(void *data, u32 data_size)
    347 {
    348 	b32 result = 0;
    349 	if (beamformer_push_data(data, data_size, 0))
    350 		result = beamformer_start_compute(-1);
    351 	return result;
    352 }
    353 
    354 function b32
    355 beamformer_export_buffer(BeamformerExportContext export_context)
    356 {
    357 	BeamformWork *work = try_push_work_queue();
    358 	b32 result = work != 0;
    359 	if (result) {
    360 		work->export_context = export_context;
    361 		work->kind = BeamformerWorkKind_ExportBuffer;
    362 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    363 		beamform_work_queue_push_commit(&g_bp->external_work_queue);
    364 	}
    365 	return result;
    366 }
    367 
    368 function b32
    369 beamformer_read_output(void *out, iz size, i32 timeout_ms)
    370 {
    371 	b32 result = 0;
    372 	if (try_wait_sync(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    373 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    374 			mem_copy(out, (u8 *)g_bp + BEAMFORMER_SCRATCH_OFF, size);
    375 			lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    376 			result = 1;
    377 		}
    378 	}
    379 	return result;
    380 }
    381 
    382 b32
    383 beamform_data_synchronized(void *data, u32 data_size, u32 output_points[3], f32 *out_data, i32 timeout_ms)
    384 {
    385 	b32 result = 0;
    386 	if (check_shared_memory()) {
    387 		output_points[0] = MAX(1, output_points[0]);
    388 		output_points[1] = MAX(1, output_points[1]);
    389 		output_points[2] = MAX(1, output_points[2]);
    390 
    391 		g_bp->parameters.output_points[0] = output_points[0];
    392 		g_bp->parameters.output_points[1] = output_points[1];
    393 		g_bp->parameters.output_points[2] = output_points[2];
    394 
    395 		iz output_size = output_points[0] * output_points[1] * output_points[2] * sizeof(f32) * 2;
    396 		if (output_size <= BEAMFORMER_SCRATCH_SIZE &&
    397 		    beamformer_push_data_with_compute(data, data_size, 0, 0))
    398 		{
    399 			BeamformerExportContext export;
    400 			export.kind = BeamformerExportKind_BeamformedData;
    401 			export.size = output_size;
    402 			if (beamformer_export_buffer(export) && beamformer_start_compute(0))
    403 				result = beamformer_read_output(out_data, output_size, timeout_ms);
    404 		} else {
    405 			g_lib_last_error = BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW;
    406 		}
    407 	}
    408 	return result;
    409 }
    410 
    411 b32
    412 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    413 {
    414 	b32 result = 0;
    415 	if (check_shared_memory()) {
    416 		static_assert(sizeof(*output) <= BEAMFORMER_SCRATCH_SIZE, "timing table size exceeds scratch space");
    417 		BeamformerExportContext export;
    418 		export.kind = BeamformerExportKind_Stats;
    419 		export.size = sizeof(*output);
    420 		if (beamformer_export_buffer(export) && beamformer_start_compute(0))
    421 			result = beamformer_read_output(output, sizeof(*output), timeout_ms);
    422 	}
    423 	return result;
    424 }
    425 
    426 i32
    427 beamformer_live_parameters_get_dirty_flag(void)
    428 {
    429 	i32 result = -1;
    430 	if (check_shared_memory()) {
    431 		u32 flag = ctz_u32(g_bp->live_imaging_dirty_flags);
    432 		if (flag != 32) {
    433 			atomic_and_u32(&g_bp->live_imaging_dirty_flags, ~(1 << flag));
    434 			result = flag;
    435 		}
    436 	}
    437 	return result;
    438 }
    439 
    440 BeamformerLiveImagingParameters *
    441 beamformer_get_live_parameters(void)
    442 {
    443 	BeamformerLiveImagingParameters *result = 0;
    444 	if (check_shared_memory()) result = &g_bp->live_imaging_parameters;
    445 	return result;
    446 }
    447 
    448 b32
    449 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new)
    450 {
    451 	b32 result = 0;
    452 	if (check_shared_memory()) {
    453 		mem_copy(&g_bp->live_imaging_parameters, new, sizeof(*new));
    454 		memory_write_barrier();
    455 		result = 1;
    456 	}
    457 	return result;
    458 }