ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

ogl_beamformer_lib.c (12948B)


      1 /* See LICENSE for license details. */
      2 #include "../compiler.h"
      3 
      4 #include "../util.h"
      5 #include "../beamformer_parameters.h"
      6 #include "ogl_beamformer_lib_base.h"
      7 #include "../beamformer_work_queue.c"
      8 
      9 global SharedMemoryRegion      g_shared_memory;
     10 global BeamformerSharedMemory *g_bp;
     11 global BeamformerLibErrorKind  g_lib_last_error;
     12 
     13 #if OS_LINUX
     14 #include "../os_linux.c"
     15 #elif OS_WINDOWS
     16 #include "../os_win32.c"
     17 
     18 W32(iptr) OpenFileMappingA(u32, b32, c8 *);
     19 
     20 #else
     21 #error Unsupported Platform
     22 #endif
     23 
     24 #if OS_LINUX
     25 
     26 function SharedMemoryRegion
     27 os_open_shared_memory_area(char *name)
     28 {
     29 	SharedMemoryRegion result = {0};
     30 	i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR);
     31 	if (fd > 0) {
     32 		void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
     33 		if (new != MAP_FAILED) result.region = new;
     34 		close(fd);
     35 	}
     36 	return result;
     37 }
     38 
     39 #elif OS_WINDOWS
     40 
     41 function SharedMemoryRegion
     42 os_open_shared_memory_area(char *name)
     43 {
     44 	SharedMemoryRegion result = {0};
     45 	iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name);
     46 	if (h != INVALID_FILE) {
     47 		void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0,
     48 		                          os_round_up_to_page_size(BEAMFORMER_SHARED_MEMORY_SIZE));
     49 		if (new) {
     50 			u8 buffer[1024];
     51 			Stream sb = {.data = buffer, .cap = 1024};
     52 			stream_append_s8s(&sb, c_str_to_s8(name), s8("_lock_"));
     53 			local_persist iptr semaphores[BeamformerSharedMemoryLockKind_Count];
     54 			local_persist w32_shared_memory_context ctx = {.semaphores = semaphores};
     55 			b32 all_semaphores = 1;
     56 			for (i32 i = 0; i < countof(semaphores); i++) {
     57 				Stream lb = sb;
     58 				stream_append_i64(&lb, i);
     59 				stream_append_byte(&lb, 0);
     60 				semaphores[i] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data);
     61 				all_semaphores &= semaphores[i] != INVALID_FILE;
     62 			}
     63 			if (all_semaphores) {
     64 				result.region     = new;
     65 				result.os_context = (iptr)&ctx;
     66 			}
     67 		}
     68 		CloseHandle(h);
     69 	}
     70 	return result;
     71 }
     72 
     73 #endif
     74 
     75 function b32
     76 check_shared_memory(void)
     77 {
     78 	b32 result = 1;
     79 	if (!g_shared_memory.region) {
     80 		g_shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME);
     81 		if (!g_shared_memory.region) {
     82 			g_lib_last_error = BF_LIB_ERR_KIND_SHARED_MEMORY;
     83 			result = 0;
     84 		} else if (((BeamformerSharedMemory *)g_shared_memory.region)->version !=
     85 		           BEAMFORMER_SHARED_MEMORY_VERSION)
     86 		{
     87 			g_lib_last_error = BF_LIB_ERR_KIND_VERSION_MISMATCH;
     88 			result = 0;
     89 		}
     90 	}
     91 	if (result && ((BeamformerSharedMemory *)g_shared_memory.region)->invalid) {
     92 		g_lib_last_error = BF_LIB_ERR_KIND_INVALID_ACCESS;
     93 		result = 0;
     94 	}
     95 	if (result) g_bp = g_shared_memory.region;
     96 	return result;
     97 }
     98 
     99 function BeamformWork *
    100 try_push_work_queue(void)
    101 {
    102 	BeamformWork *result = beamform_work_queue_push(&g_bp->external_work_queue);
    103 	if (!result) g_lib_last_error = BF_LIB_ERR_KIND_WORK_QUEUE_FULL;
    104 	return result;
    105 }
    106 
    107 function b32
    108 lib_try_lock(BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
    109 {
    110 	b32 result = os_shared_memory_region_lock(&g_shared_memory, g_bp->locks, (i32)lock, timeout_ms);
    111 	if (!result) g_lib_last_error = BF_LIB_ERR_KIND_SYNC_VARIABLE;
    112 	return result;
    113 }
    114 
    115 function void
    116 lib_release_lock(BeamformerSharedMemoryLockKind lock)
    117 {
    118 	os_shared_memory_region_unlock(&g_shared_memory, g_bp->locks, (i32)lock);
    119 }
    120 
    121 function b32
    122 try_wait_sync(BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
    123 {
    124 	b32 result = 0;
    125 	if (lib_try_lock(lock, 0) && lib_try_lock(lock, timeout_ms)) {
    126 		/* TODO(rnp): non-critical race condition */
    127 		lib_release_lock(lock);
    128 		result = 1;
    129 	}
    130 	return result;
    131 }
    132 
    133 u32
    134 beamformer_get_api_version(void)
    135 {
    136 	return BEAMFORMER_SHARED_MEMORY_VERSION;
    137 }
    138 
    139 const char *
    140 beamformer_error_string(BeamformerLibErrorKind kind)
    141 {
    142 	#define X(type, num, string) string,
    143 	local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"};
    144 	#undef X
    145 	return error_string_table[MIN(kind, countof(error_string_table) - 1)];
    146 }
    147 
    148 BeamformerLibErrorKind
    149 beamformer_get_last_error(void)
    150 {
    151 	return g_lib_last_error;
    152 }
    153 
    154 const char *
    155 beamformer_get_last_error_string(void)
    156 {
    157 	return beamformer_error_string(beamformer_get_last_error());
    158 }
    159 
    160 b32
    161 set_beamformer_pipeline(i32 *stages, i32 stages_count)
    162 {
    163 	b32 result = 0;
    164 	if (stages_count <= countof(g_bp->compute_stages)) {
    165 		if (check_shared_memory()) {
    166 			g_bp->compute_stages_count = 0;
    167 			for (i32 i = 0; i < stages_count; i++) {
    168 				if (BETWEEN(stages[i], 0, BeamformerShaderKind_ComputeCount)) {
    169 					g_bp->compute_stages[g_bp->compute_stages_count++] = stages[i];
    170 				}
    171 			}
    172 			result = g_bp->compute_stages_count == stages_count;
    173 			if (!result) {
    174 				g_lib_last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
    175 				g_bp->compute_stages_count = 0;
    176 			}
    177 		}
    178 	} else {
    179 		g_lib_last_error = BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW;
    180 	}
    181 	return result;
    182 }
    183 
    184 b32
    185 beamformer_start_compute(i32 timeout_ms)
    186 {
    187 	b32 result = check_shared_memory() &&
    188 	             try_wait_sync(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms);
    189 	return result;
    190 }
    191 
    192 function b32
    193 beamformer_upload_buffer(void *data, u32 size, i32 store_offset, BeamformerUploadContext upload_context,
    194                          BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
    195 {
    196 	b32 result = 0;
    197 	if (check_shared_memory()) {
    198 		BeamformWork *work = try_push_work_queue();
    199 		result = work && lib_try_lock(lock, timeout_ms);
    200 		if (result) {
    201 			work->upload_context = upload_context;
    202 			work->kind = BeamformerWorkKind_UploadBuffer;
    203 			work->lock = lock;
    204 			mem_copy((u8 *)g_bp + store_offset, data, size);
    205 			if ((atomic_load_u32(&g_bp->dirty_regions) & (1 << (lock - 1))) == 0) {
    206 				atomic_or_u32(&g_bp->dirty_regions, (1 << (lock - 1)));
    207 				beamform_work_queue_push_commit(&g_bp->external_work_queue);
    208 			}
    209 			lib_release_lock(lock);
    210 		}
    211 	}
    212 	return result;
    213 }
    214 
    215 #define BEAMFORMER_UPLOAD_FNS \
    216 	X(channel_mapping, i16, 1, ChannelMapping, CHANNEL_MAPPING) \
    217 	X(sparse_elements, i16, 1, SparseElements, SPARSE_ELEMENTS) \
    218 	X(focal_vectors,   f32, 2, FocalVectors,   FOCAL_VECTORS)
    219 
    220 #define X(name, dtype, elements, lock_name, command) \
    221 b32 beamformer_push_##name (dtype *data, u32 count, i32 timeout_ms) { \
    222 	b32 result = 0; \
    223 	if (count <= countof(g_bp->name)) { \
    224 		BeamformerUploadContext uc = {0}; \
    225 		uc.shared_memory_offset = offsetof(BeamformerSharedMemory, name); \
    226 		uc.kind = BU_KIND_##command; \
    227 		uc.size = count * elements * sizeof(dtype); \
    228 		result = beamformer_upload_buffer(data, uc.size, uc.shared_memory_offset, uc, \
    229 		                                  BeamformerSharedMemoryLockKind_##lock_name, timeout_ms); \
    230 	} else { \
    231 		g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW; \
    232 	} \
    233 	return result; \
    234 }
    235 BEAMFORMER_UPLOAD_FNS
    236 #undef X
    237 
    238 function b32
    239 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, b32 start_from_main)
    240 {
    241 	b32 result = 0;
    242 	if (data_size <= BEAMFORMER_MAX_RF_DATA_SIZE) {
    243 		BeamformerUploadContext uc = {0};
    244 		uc.shared_memory_offset = BEAMFORMER_SCRATCH_OFF;
    245 		uc.size = data_size;
    246 		uc.kind = BU_KIND_RF_DATA;
    247 		result = beamformer_upload_buffer(data, data_size, uc.shared_memory_offset, uc,
    248 		                                  BeamformerSharedMemoryLockKind_ScratchSpace, timeout_ms);
    249 		if (result && start_from_main) atomic_store_u32(&g_bp->start_compute_from_main, 1);
    250 	} else {
    251 		g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW;
    252 	}
    253 	return result;
    254 }
    255 
    256 b32
    257 beamformer_push_data(void *data, u32 data_size, i32 timeout_ms)
    258 {
    259 	return beamformer_push_data_base(data, data_size, timeout_ms, 1);
    260 }
    261 
    262 b32
    263 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, i32 timeout_ms)
    264 {
    265 	b32 result = beamformer_push_data_base(data, data_size, timeout_ms, 0);
    266 	if (result) {
    267 		result = image_plane_tag < IPT_LAST;
    268 		if (result) {
    269 			BeamformWork *work = try_push_work_queue();
    270 			result = work != 0;
    271 			if (result) {
    272 				work->kind = BeamformerWorkKind_ComputeIndirect;
    273 				work->compute_indirect_plane = image_plane_tag;
    274 				beamform_work_queue_push_commit(&g_bp->external_work_queue);
    275 			}
    276 		} else {
    277 			g_lib_last_error = BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE;
    278 		}
    279 	}
    280 	return result;
    281 }
    282 
    283 b32
    284 beamformer_push_parameters(BeamformerParameters *bp, i32 timeout_ms)
    285 {
    286 	BeamformerUploadContext uc = {0};
    287 	uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
    288 	uc.size = sizeof(g_bp->parameters);
    289 	uc.kind = BU_KIND_PARAMETERS;
    290 	b32 result = beamformer_upload_buffer(bp, sizeof(*bp),
    291 	                                      offsetof(BeamformerSharedMemory, parameters), uc,
    292 	                                      BeamformerSharedMemoryLockKind_Parameters, timeout_ms);
    293 	return result;
    294 }
    295 
    296 b32
    297 beamformer_push_parameters_ui(BeamformerUIParameters *bp, i32 timeout_ms)
    298 {
    299 	BeamformerUploadContext uc = {0};
    300 	uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
    301 	uc.size = sizeof(g_bp->parameters);
    302 	uc.kind = BU_KIND_PARAMETERS;
    303 	b32 result = beamformer_upload_buffer(bp, sizeof(*bp),
    304 	                                      offsetof(BeamformerSharedMemory, parameters_ui), uc,
    305 	                                      BeamformerSharedMemoryLockKind_Parameters, timeout_ms);
    306 	return result;
    307 }
    308 
    309 b32
    310 beamformer_push_parameters_head(BeamformerParametersHead *bp, i32 timeout_ms)
    311 {
    312 	BeamformerUploadContext uc = {0};
    313 	uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters);
    314 	uc.size = sizeof(g_bp->parameters);
    315 	uc.kind = BU_KIND_PARAMETERS;
    316 	b32 result = beamformer_upload_buffer(bp, sizeof(*bp),
    317 	                                      offsetof(BeamformerSharedMemory, parameters_head), uc,
    318 	                                      BeamformerSharedMemoryLockKind_Parameters, timeout_ms);
    319 	return result;
    320 }
    321 
    322 b32
    323 set_beamformer_parameters(BeamformerParametersV0 *new_bp)
    324 {
    325 	b32 result = 1;
    326 	result &= beamformer_push_channel_mapping((i16 *)new_bp->channel_mapping,
    327 	                                          countof(new_bp->channel_mapping), 0);
    328 	result &= beamformer_push_sparse_elements((i16 *)new_bp->uforces_channels,
    329 	                                          countof(new_bp->uforces_channels), 0);
    330 	v2 focal_vectors[256];
    331 	for (u32 i = 0; i < countof(focal_vectors); i++)
    332 		focal_vectors[i] = (v2){{new_bp->transmit_angles[i], new_bp->focal_depths[i]}};
    333 	result &= beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0);
    334 	result &= beamformer_push_parameters((BeamformerParameters *)&new_bp->xdc_transform, 0);
    335 	return result;
    336 }
    337 
    338 b32
    339 send_data(void *data, u32 data_size)
    340 {
    341 	b32 result = 0;
    342 	if (beamformer_push_data(data, data_size, 0))
    343 		result = beamformer_start_compute(-1);
    344 	return result;
    345 }
    346 
    347 function b32
    348 beamformer_export_buffer(BeamformerExportContext export_context)
    349 {
    350 	BeamformWork *work = try_push_work_queue();
    351 	b32 result = work != 0;
    352 	if (result) {
    353 		work->export_context = export_context;
    354 		work->kind = BeamformerWorkKind_ExportBuffer;
    355 		work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
    356 		beamform_work_queue_push_commit(&g_bp->external_work_queue);
    357 	}
    358 	return result;
    359 }
    360 
    361 function b32
    362 beamformer_read_output(void *out, iz size, i32 timeout_ms)
    363 {
    364 	b32 result = 0;
    365 	if (try_wait_sync(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
    366 		if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
    367 			mem_copy(out, (u8 *)g_bp + BEAMFORMER_SCRATCH_OFF, size);
    368 			lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
    369 			result = 1;
    370 		}
    371 	}
    372 	return result;
    373 }
    374 
    375 b32
    376 beamform_data_synchronized(void *data, u32 data_size, u32 output_points[3], f32 *out_data, i32 timeout_ms)
    377 {
    378 	b32 result = 0;
    379 	if (check_shared_memory()) {
    380 		output_points[0] = MAX(1, output_points[0]);
    381 		output_points[1] = MAX(1, output_points[1]);
    382 		output_points[2] = MAX(1, output_points[2]);
    383 
    384 		g_bp->parameters.output_points[0] = output_points[0];
    385 		g_bp->parameters.output_points[1] = output_points[1];
    386 		g_bp->parameters.output_points[2] = output_points[2];
    387 
    388 		iz output_size = output_points[0] * output_points[1] * output_points[2] * sizeof(f32) * 2;
    389 		if (output_size <= BEAMFORMER_SCRATCH_SIZE &&
    390 		    beamformer_push_data_with_compute(data, data_size, 0, 0))
    391 		{
    392 			BeamformerExportContext export;
    393 			export.kind = BeamformerExportKind_BeamformedData;
    394 			export.size = output_size;
    395 			if (beamformer_export_buffer(export) &&
    396 			    lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0))
    397 			{
    398 				result = beamformer_read_output(out_data, output_size, timeout_ms);
    399 			}
    400 		} else {
    401 			g_lib_last_error = BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW;
    402 		}
    403 	}
    404 	return result;
    405 }
    406 
    407 b32
    408 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
    409 {
    410 	b32 result = 0;
    411 	if (check_shared_memory()) {
    412 		static_assert(sizeof(*output) <= BEAMFORMER_SCRATCH_SIZE, "timing table size exceeds scratch space");
    413 		BeamformerExportContext export;
    414 		export.kind = BeamformerExportKind_Stats;
    415 		export.size = sizeof(*output);
    416 
    417 		if (beamformer_export_buffer(export) &&
    418 		    lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0))
    419 		{
    420 			result = beamformer_read_output(output, sizeof(*output), timeout_ms);
    421 		}
    422 	}
    423 	return result;
    424 }