ogl_beamformer_lib.c (12948B)
1 /* See LICENSE for license details. */ 2 #include "../compiler.h" 3 4 #include "../util.h" 5 #include "../beamformer_parameters.h" 6 #include "ogl_beamformer_lib_base.h" 7 #include "../beamformer_work_queue.c" 8 9 global SharedMemoryRegion g_shared_memory; 10 global BeamformerSharedMemory *g_bp; 11 global BeamformerLibErrorKind g_lib_last_error; 12 13 #if OS_LINUX 14 #include "../os_linux.c" 15 #elif OS_WINDOWS 16 #include "../os_win32.c" 17 18 W32(iptr) OpenFileMappingA(u32, b32, c8 *); 19 20 #else 21 #error Unsupported Platform 22 #endif 23 24 #if OS_LINUX 25 26 function SharedMemoryRegion 27 os_open_shared_memory_area(char *name) 28 { 29 SharedMemoryRegion result = {0}; 30 i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR); 31 if (fd > 0) { 32 void *new = mmap(0, BEAMFORMER_SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 33 if (new != MAP_FAILED) result.region = new; 34 close(fd); 35 } 36 return result; 37 } 38 39 #elif OS_WINDOWS 40 41 function SharedMemoryRegion 42 os_open_shared_memory_area(char *name) 43 { 44 SharedMemoryRegion result = {0}; 45 iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name); 46 if (h != INVALID_FILE) { 47 void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 48 os_round_up_to_page_size(BEAMFORMER_SHARED_MEMORY_SIZE)); 49 if (new) { 50 u8 buffer[1024]; 51 Stream sb = {.data = buffer, .cap = 1024}; 52 stream_append_s8s(&sb, c_str_to_s8(name), s8("_lock_")); 53 local_persist iptr semaphores[BeamformerSharedMemoryLockKind_Count]; 54 local_persist w32_shared_memory_context ctx = {.semaphores = semaphores}; 55 b32 all_semaphores = 1; 56 for (i32 i = 0; i < countof(semaphores); i++) { 57 Stream lb = sb; 58 stream_append_i64(&lb, i); 59 stream_append_byte(&lb, 0); 60 semaphores[i] = CreateSemaphoreA(0, 1, 1, (c8 *)lb.data); 61 all_semaphores &= semaphores[i] != INVALID_FILE; 62 } 63 if (all_semaphores) { 64 result.region = new; 65 result.os_context = (iptr)&ctx; 66 } 67 } 68 CloseHandle(h); 69 } 70 return result; 71 } 72 73 #endif 74 75 function b32 76 check_shared_memory(void) 77 { 78 b32 result = 1; 79 if (!g_shared_memory.region) { 80 g_shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME); 81 if (!g_shared_memory.region) { 82 g_lib_last_error = BF_LIB_ERR_KIND_SHARED_MEMORY; 83 result = 0; 84 } else if (((BeamformerSharedMemory *)g_shared_memory.region)->version != 85 BEAMFORMER_SHARED_MEMORY_VERSION) 86 { 87 g_lib_last_error = BF_LIB_ERR_KIND_VERSION_MISMATCH; 88 result = 0; 89 } 90 } 91 if (result && ((BeamformerSharedMemory *)g_shared_memory.region)->invalid) { 92 g_lib_last_error = BF_LIB_ERR_KIND_INVALID_ACCESS; 93 result = 0; 94 } 95 if (result) g_bp = g_shared_memory.region; 96 return result; 97 } 98 99 function BeamformWork * 100 try_push_work_queue(void) 101 { 102 BeamformWork *result = beamform_work_queue_push(&g_bp->external_work_queue); 103 if (!result) g_lib_last_error = BF_LIB_ERR_KIND_WORK_QUEUE_FULL; 104 return result; 105 } 106 107 function b32 108 lib_try_lock(BeamformerSharedMemoryLockKind lock, i32 timeout_ms) 109 { 110 b32 result = os_shared_memory_region_lock(&g_shared_memory, g_bp->locks, (i32)lock, timeout_ms); 111 if (!result) g_lib_last_error = BF_LIB_ERR_KIND_SYNC_VARIABLE; 112 return result; 113 } 114 115 function void 116 lib_release_lock(BeamformerSharedMemoryLockKind lock) 117 { 118 os_shared_memory_region_unlock(&g_shared_memory, g_bp->locks, (i32)lock); 119 } 120 121 function b32 122 try_wait_sync(BeamformerSharedMemoryLockKind lock, i32 timeout_ms) 123 { 124 b32 result = 0; 125 if (lib_try_lock(lock, 0) && lib_try_lock(lock, timeout_ms)) { 126 /* TODO(rnp): non-critical race condition */ 127 lib_release_lock(lock); 128 result = 1; 129 } 130 return result; 131 } 132 133 u32 134 beamformer_get_api_version(void) 135 { 136 return BEAMFORMER_SHARED_MEMORY_VERSION; 137 } 138 139 const char * 140 beamformer_error_string(BeamformerLibErrorKind kind) 141 { 142 #define X(type, num, string) string, 143 local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"}; 144 #undef X 145 return error_string_table[MIN(kind, countof(error_string_table) - 1)]; 146 } 147 148 BeamformerLibErrorKind 149 beamformer_get_last_error(void) 150 { 151 return g_lib_last_error; 152 } 153 154 const char * 155 beamformer_get_last_error_string(void) 156 { 157 return beamformer_error_string(beamformer_get_last_error()); 158 } 159 160 b32 161 set_beamformer_pipeline(i32 *stages, i32 stages_count) 162 { 163 b32 result = 0; 164 if (stages_count <= countof(g_bp->compute_stages)) { 165 if (check_shared_memory()) { 166 g_bp->compute_stages_count = 0; 167 for (i32 i = 0; i < stages_count; i++) { 168 if (BETWEEN(stages[i], 0, BeamformerShaderKind_ComputeCount)) { 169 g_bp->compute_stages[g_bp->compute_stages_count++] = stages[i]; 170 } 171 } 172 result = g_bp->compute_stages_count == stages_count; 173 if (!result) { 174 g_lib_last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE; 175 g_bp->compute_stages_count = 0; 176 } 177 } 178 } else { 179 g_lib_last_error = BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW; 180 } 181 return result; 182 } 183 184 b32 185 beamformer_start_compute(i32 timeout_ms) 186 { 187 b32 result = check_shared_memory() && 188 try_wait_sync(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms); 189 return result; 190 } 191 192 function b32 193 beamformer_upload_buffer(void *data, u32 size, i32 store_offset, BeamformerUploadContext upload_context, 194 BeamformerSharedMemoryLockKind lock, i32 timeout_ms) 195 { 196 b32 result = 0; 197 if (check_shared_memory()) { 198 BeamformWork *work = try_push_work_queue(); 199 result = work && lib_try_lock(lock, timeout_ms); 200 if (result) { 201 work->upload_context = upload_context; 202 work->kind = BeamformerWorkKind_UploadBuffer; 203 work->lock = lock; 204 mem_copy((u8 *)g_bp + store_offset, data, size); 205 if ((atomic_load_u32(&g_bp->dirty_regions) & (1 << (lock - 1))) == 0) { 206 atomic_or_u32(&g_bp->dirty_regions, (1 << (lock - 1))); 207 beamform_work_queue_push_commit(&g_bp->external_work_queue); 208 } 209 lib_release_lock(lock); 210 } 211 } 212 return result; 213 } 214 215 #define BEAMFORMER_UPLOAD_FNS \ 216 X(channel_mapping, i16, 1, ChannelMapping, CHANNEL_MAPPING) \ 217 X(sparse_elements, i16, 1, SparseElements, SPARSE_ELEMENTS) \ 218 X(focal_vectors, f32, 2, FocalVectors, FOCAL_VECTORS) 219 220 #define X(name, dtype, elements, lock_name, command) \ 221 b32 beamformer_push_##name (dtype *data, u32 count, i32 timeout_ms) { \ 222 b32 result = 0; \ 223 if (count <= countof(g_bp->name)) { \ 224 BeamformerUploadContext uc = {0}; \ 225 uc.shared_memory_offset = offsetof(BeamformerSharedMemory, name); \ 226 uc.kind = BU_KIND_##command; \ 227 uc.size = count * elements * sizeof(dtype); \ 228 result = beamformer_upload_buffer(data, uc.size, uc.shared_memory_offset, uc, \ 229 BeamformerSharedMemoryLockKind_##lock_name, timeout_ms); \ 230 } else { \ 231 g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW; \ 232 } \ 233 return result; \ 234 } 235 BEAMFORMER_UPLOAD_FNS 236 #undef X 237 238 function b32 239 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, b32 start_from_main) 240 { 241 b32 result = 0; 242 if (data_size <= BEAMFORMER_MAX_RF_DATA_SIZE) { 243 BeamformerUploadContext uc = {0}; 244 uc.shared_memory_offset = BEAMFORMER_SCRATCH_OFF; 245 uc.size = data_size; 246 uc.kind = BU_KIND_RF_DATA; 247 result = beamformer_upload_buffer(data, data_size, uc.shared_memory_offset, uc, 248 BeamformerSharedMemoryLockKind_ScratchSpace, timeout_ms); 249 if (result && start_from_main) atomic_store_u32(&g_bp->start_compute_from_main, 1); 250 } else { 251 g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW; 252 } 253 return result; 254 } 255 256 b32 257 beamformer_push_data(void *data, u32 data_size, i32 timeout_ms) 258 { 259 return beamformer_push_data_base(data, data_size, timeout_ms, 1); 260 } 261 262 b32 263 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, i32 timeout_ms) 264 { 265 b32 result = beamformer_push_data_base(data, data_size, timeout_ms, 0); 266 if (result) { 267 result = image_plane_tag < IPT_LAST; 268 if (result) { 269 BeamformWork *work = try_push_work_queue(); 270 result = work != 0; 271 if (result) { 272 work->kind = BeamformerWorkKind_ComputeIndirect; 273 work->compute_indirect_plane = image_plane_tag; 274 beamform_work_queue_push_commit(&g_bp->external_work_queue); 275 } 276 } else { 277 g_lib_last_error = BF_LIB_ERR_KIND_INVALID_IMAGE_PLANE; 278 } 279 } 280 return result; 281 } 282 283 b32 284 beamformer_push_parameters(BeamformerParameters *bp, i32 timeout_ms) 285 { 286 BeamformerUploadContext uc = {0}; 287 uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters); 288 uc.size = sizeof(g_bp->parameters); 289 uc.kind = BU_KIND_PARAMETERS; 290 b32 result = beamformer_upload_buffer(bp, sizeof(*bp), 291 offsetof(BeamformerSharedMemory, parameters), uc, 292 BeamformerSharedMemoryLockKind_Parameters, timeout_ms); 293 return result; 294 } 295 296 b32 297 beamformer_push_parameters_ui(BeamformerUIParameters *bp, i32 timeout_ms) 298 { 299 BeamformerUploadContext uc = {0}; 300 uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters); 301 uc.size = sizeof(g_bp->parameters); 302 uc.kind = BU_KIND_PARAMETERS; 303 b32 result = beamformer_upload_buffer(bp, sizeof(*bp), 304 offsetof(BeamformerSharedMemory, parameters_ui), uc, 305 BeamformerSharedMemoryLockKind_Parameters, timeout_ms); 306 return result; 307 } 308 309 b32 310 beamformer_push_parameters_head(BeamformerParametersHead *bp, i32 timeout_ms) 311 { 312 BeamformerUploadContext uc = {0}; 313 uc.shared_memory_offset = offsetof(BeamformerSharedMemory, parameters); 314 uc.size = sizeof(g_bp->parameters); 315 uc.kind = BU_KIND_PARAMETERS; 316 b32 result = beamformer_upload_buffer(bp, sizeof(*bp), 317 offsetof(BeamformerSharedMemory, parameters_head), uc, 318 BeamformerSharedMemoryLockKind_Parameters, timeout_ms); 319 return result; 320 } 321 322 b32 323 set_beamformer_parameters(BeamformerParametersV0 *new_bp) 324 { 325 b32 result = 1; 326 result &= beamformer_push_channel_mapping((i16 *)new_bp->channel_mapping, 327 countof(new_bp->channel_mapping), 0); 328 result &= beamformer_push_sparse_elements((i16 *)new_bp->uforces_channels, 329 countof(new_bp->uforces_channels), 0); 330 v2 focal_vectors[256]; 331 for (u32 i = 0; i < countof(focal_vectors); i++) 332 focal_vectors[i] = (v2){{new_bp->transmit_angles[i], new_bp->focal_depths[i]}}; 333 result &= beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0); 334 result &= beamformer_push_parameters((BeamformerParameters *)&new_bp->xdc_transform, 0); 335 return result; 336 } 337 338 b32 339 send_data(void *data, u32 data_size) 340 { 341 b32 result = 0; 342 if (beamformer_push_data(data, data_size, 0)) 343 result = beamformer_start_compute(-1); 344 return result; 345 } 346 347 function b32 348 beamformer_export_buffer(BeamformerExportContext export_context) 349 { 350 BeamformWork *work = try_push_work_queue(); 351 b32 result = work != 0; 352 if (result) { 353 work->export_context = export_context; 354 work->kind = BeamformerWorkKind_ExportBuffer; 355 work->lock = BeamformerSharedMemoryLockKind_ScratchSpace; 356 beamform_work_queue_push_commit(&g_bp->external_work_queue); 357 } 358 return result; 359 } 360 361 function b32 362 beamformer_read_output(void *out, iz size, i32 timeout_ms) 363 { 364 b32 result = 0; 365 if (try_wait_sync(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) { 366 if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) { 367 mem_copy(out, (u8 *)g_bp + BEAMFORMER_SCRATCH_OFF, size); 368 lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace); 369 result = 1; 370 } 371 } 372 return result; 373 } 374 375 b32 376 beamform_data_synchronized(void *data, u32 data_size, u32 output_points[3], f32 *out_data, i32 timeout_ms) 377 { 378 b32 result = 0; 379 if (check_shared_memory()) { 380 output_points[0] = MAX(1, output_points[0]); 381 output_points[1] = MAX(1, output_points[1]); 382 output_points[2] = MAX(1, output_points[2]); 383 384 g_bp->parameters.output_points[0] = output_points[0]; 385 g_bp->parameters.output_points[1] = output_points[1]; 386 g_bp->parameters.output_points[2] = output_points[2]; 387 388 iz output_size = output_points[0] * output_points[1] * output_points[2] * sizeof(f32) * 2; 389 if (output_size <= BEAMFORMER_SCRATCH_SIZE && 390 beamformer_push_data_with_compute(data, data_size, 0, 0)) 391 { 392 BeamformerExportContext export; 393 export.kind = BeamformerExportKind_BeamformedData; 394 export.size = output_size; 395 if (beamformer_export_buffer(export) && 396 lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0)) 397 { 398 result = beamformer_read_output(out_data, output_size, timeout_ms); 399 } 400 } else { 401 g_lib_last_error = BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW; 402 } 403 } 404 return result; 405 } 406 407 b32 408 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms) 409 { 410 b32 result = 0; 411 if (check_shared_memory()) { 412 static_assert(sizeof(*output) <= BEAMFORMER_SCRATCH_SIZE, "timing table size exceeds scratch space"); 413 BeamformerExportContext export; 414 export.kind = BeamformerExportKind_Stats; 415 export.size = sizeof(*output); 416 417 if (beamformer_export_buffer(export) && 418 lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0)) 419 { 420 result = beamformer_read_output(output, sizeof(*output), timeout_ms); 421 } 422 } 423 return result; 424 }