throughput.c (19183B)
1 /* See LICENSE for license details. */ 2 /* TODO(rnp): 3 * [ ]: for finer grained evaluation of throughput latency just queue a data upload 4 * without replacing the data. 5 * [ ]: bug: we aren't inserting rf data between each frame 6 */ 7 8 #define BEAMFORMER_LIB_EXPORT function 9 #include "ogl_beamformer_lib.c" 10 11 #include <signal.h> 12 #include <stdarg.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <zstd.h> 16 17 global iv3 g_output_points = {{512, 1, 1024}}; 18 global v2 g_axial_extent = {{ 10e-3f, 165e-3f}}; 19 global v2 g_lateral_extent = {{-60e-3f, 60e-3f}}; 20 global f32 g_f_number = 0.5f; 21 22 typedef struct { 23 b32 loop; 24 b32 cuda; 25 u32 frame_number; 26 27 char **remaining; 28 i32 remaining_count; 29 } Options; 30 31 #include "external/zemp_bp.h" 32 33 typedef struct { 34 ZBP_DataKind kind; 35 ZBP_DataCompressionKind compression_kind; 36 s8 bytes; 37 } ZBP_Data; 38 39 global b32 g_should_exit; 40 41 #define die(...) die_((char *)__func__, __VA_ARGS__) 42 function no_return void 43 die_(char *function_name, char *format, ...) 44 { 45 if (function_name) 46 fprintf(stderr, "%s: ", function_name); 47 48 va_list ap; 49 50 va_start(ap, format); 51 vfprintf(stderr, format, ap); 52 va_end(ap); 53 54 os_exit(1); 55 } 56 57 #if OS_LINUX 58 59 #include <fcntl.h> 60 #include <sys/stat.h> 61 #include <unistd.h> 62 63 function s8 64 os_read_file_simp(char *fname) 65 { 66 s8 result; 67 i32 fd = open(fname, O_RDONLY); 68 if (fd < 0) 69 die("couldn't open file: %s\n", fname); 70 71 struct stat st; 72 if (stat(fname, &st) < 0) 73 die("couldn't stat file\n"); 74 75 result.len = st.st_size; 76 result.data = malloc((uz)st.st_size); 77 if (!result.data) 78 die("couldn't alloc space for reading\n"); 79 80 iz rlen = read(fd, result.data, (u32)st.st_size); 81 close(fd); 82 83 if (rlen != st.st_size) 84 die("couldn't read file: %s\n", fname); 85 86 return result; 87 } 88 89 #elif OS_WINDOWS 90 91 function s8 92 os_read_file_simp(char *fname) 93 { 94 s8 result; 95 iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); 96 if (h == INVALID_FILE) 97 die("couldn't open file: %s\n", fname); 98 99 w32_file_info fileinfo; 100 if (!GetFileInformationByHandle(h, &fileinfo)) 101 die("couldn't get file info\n", stderr); 102 103 result.len = fileinfo.nFileSizeLow; 104 result.data = malloc(fileinfo.nFileSizeLow); 105 if (!result.data) 106 die("couldn't alloc space for reading\n"); 107 108 i32 rlen = 0; 109 if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow) 110 die("couldn't read file: %s\n", fname); 111 CloseHandle(h); 112 113 return result; 114 } 115 116 #else 117 #error Unsupported Platform 118 #endif 119 120 function void 121 stream_ensure_termination(Stream *s, u8 byte) 122 { 123 b32 found = 0; 124 if (!s->errors && s->widx > 0) 125 found = s->data[s->widx - 1] == byte; 126 if (!found) { 127 s->errors |= s->cap - 1 < s->widx; 128 if (!s->errors) 129 s->data[s->widx++] = byte; 130 } 131 } 132 133 function void * 134 decompress_zstd_data(s8 raw) 135 { 136 uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len); 137 void *out = malloc(requested_size); 138 if (out) { 139 uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len); 140 if (decompressed != requested_size) { 141 free(out); 142 out = 0; 143 } 144 } 145 return out; 146 } 147 148 function b32 149 beamformer_simple_parameters_from_zbp_file(BeamformerSimpleParameters *bp, char *path, ZBP_Data *raw_data) 150 { 151 s8 raw = os_read_file_simp(path); 152 if (raw.len < (iz)sizeof(ZBP_BaseHeader) || ((ZBP_BaseHeader *)raw.data)->magic != ZBP_HeaderMagic) 153 return 0; 154 155 switch (((ZBP_BaseHeader *)raw.data)->major) { 156 157 case 1:{ 158 ZBP_HeaderV1 *header = (ZBP_HeaderV1 *)raw.data; 159 160 bp->sample_count = header->sample_count; 161 bp->channel_count = header->channel_count; 162 bp->acquisition_count = header->receive_event_count; 163 164 bp->sampling_mode = BeamformerSamplingMode_4X; 165 bp->acquisition_kind = header->beamform_mode; 166 bp->decode_mode = header->decode_mode; 167 bp->sampling_frequency = header->sampling_frequency; 168 bp->demodulation_frequency = header->sampling_frequency / 4; 169 bp->speed_of_sound = header->speed_of_sound; 170 bp->time_offset = header->time_offset; 171 172 mem_copy(bp->channel_mapping, header->channel_mapping, sizeof(*bp->channel_mapping) * bp->channel_count); 173 mem_copy(bp->xdc_transform.E, header->transducer_transform_matrix, sizeof(bp->xdc_transform)); 174 mem_copy(bp->xdc_element_pitch.E, header->transducer_element_pitch, sizeof(bp->xdc_element_pitch)); 175 // NOTE(rnp): ignores emission count and ensemble count 176 mem_copy(bp->raw_data_dimensions.E, header->raw_data_dimension, sizeof(bp->raw_data_dimensions)); 177 178 bp->data_kind = (BeamformerDataKind)ZBP_DataKind_Int16; 179 raw_data->kind = ZBP_DataKind_Int16; 180 raw_data->compression_kind = ZBP_DataCompressionKind_ZSTD; 181 182 read_only local_persist u8 transmit_mode_to_orientation[] = { 183 [0] = (ZBP_RCAOrientation_Rows << 4) | ZBP_RCAOrientation_Rows, 184 [1] = (ZBP_RCAOrientation_Rows << 4) | ZBP_RCAOrientation_Columns, 185 [2] = (ZBP_RCAOrientation_Columns << 4) | ZBP_RCAOrientation_Rows, 186 [3] = (ZBP_RCAOrientation_Columns << 4) | ZBP_RCAOrientation_Columns, 187 }; 188 if (header->transmit_mode >= countof(transmit_mode_to_orientation)) 189 return 0; 190 191 bp->transmit_receive_orientation = transmit_mode_to_orientation[header->transmit_mode]; 192 193 ZBP_AcquisitionKind acquisition_kind = header->beamform_mode; 194 if (acquisition_kind == ZBP_AcquisitionKind_FORCES || 195 acquisition_kind == ZBP_AcquisitionKind_HERCULES || 196 acquisition_kind == ZBP_AcquisitionKind_UFORCES || 197 acquisition_kind == ZBP_AcquisitionKind_UHERCULES) 198 { 199 bp->single_focus = 1; 200 bp->single_orientation = 1; 201 bp->focal_vector.E[0] = header->steering_angles[0]; 202 bp->focal_vector.E[1] = header->focal_depths[0]; 203 } 204 205 if (acquisition_kind == ZBP_AcquisitionKind_UFORCES || 206 acquisition_kind == ZBP_AcquisitionKind_UHERCULES) 207 { 208 mem_copy(bp->sparse_elements, header->sparse_elements, sizeof(*bp->sparse_elements) * bp->acquisition_count); 209 } 210 211 if (acquisition_kind == ZBP_AcquisitionKind_RCA_TPW || 212 acquisition_kind == ZBP_AcquisitionKind_RCA_VLS) 213 { 214 mem_copy(bp->focal_depths, header->focal_depths, sizeof(*bp->focal_depths) * bp->acquisition_count); 215 mem_copy(bp->steering_angles, header->steering_angles, sizeof(*bp->steering_angles) * bp->acquisition_count); 216 for EachIndex(bp->acquisition_count, it) 217 bp->transmit_receive_orientations[it] = bp->transmit_receive_orientation; 218 } 219 220 bp->emission_parameters.kind = BeamformerEmissionKind_Sine; 221 bp->emission_parameters.sine.cycles = 2; 222 bp->emission_parameters.sine.frequency = bp->demodulation_frequency; 223 }break; 224 225 case 2:{ 226 ZBP_HeaderV2 *header = (ZBP_HeaderV2 *)raw.data; 227 228 bp->sample_count = header->sample_count; 229 bp->channel_count = header->channel_count; 230 bp->acquisition_count = header->receive_event_count; 231 232 read_only local_persist BeamformerSamplingMode zbp_sampling_mode_to_beamformer[] = { 233 [ZBP_SamplingMode_Standard] = BeamformerSamplingMode_4X, 234 [ZBP_SamplingMode_Bandpass] = BeamformerSamplingMode_2X, 235 }; 236 bp->sampling_mode = zbp_sampling_mode_to_beamformer[header->sampling_mode]; 237 238 bp->acquisition_kind = header->acquisition_mode; 239 bp->decode_mode = header->decode_mode; 240 bp->sampling_frequency = header->sampling_frequency; 241 bp->demodulation_frequency = header->demodulation_frequency; 242 bp->speed_of_sound = header->speed_of_sound; 243 bp->time_offset = header->time_offset; 244 245 bp->contrast_mode = header->contrast_mode; 246 247 if (header->channel_mapping_offset != -1) { 248 mem_copy(bp->channel_mapping, raw.data + header->channel_mapping_offset, 249 sizeof(*bp->channel_mapping) * bp->channel_count); 250 } else { 251 for EachIndex(bp->channel_count, it) 252 bp->channel_mapping[it] = it; 253 } 254 255 mem_copy(bp->xdc_transform.E, header->transducer_transform_matrix, sizeof(bp->xdc_transform)); 256 mem_copy(bp->xdc_element_pitch.E, header->transducer_element_pitch, sizeof(bp->xdc_element_pitch)); 257 // NOTE(rnp): ignores group count and ensemble count 258 mem_copy(bp->raw_data_dimensions.E, header->raw_data_dimension, sizeof(bp->raw_data_dimensions)); 259 260 bp->data_kind = header->raw_data_kind; 261 raw_data->kind = header->raw_data_kind; 262 raw_data->compression_kind = header->raw_data_compression_kind; 263 264 if (header->raw_data_offset != -1) { 265 raw_data->bytes.data = raw.data + header->raw_data_offset; 266 if (raw_data->compression_kind == ZBP_DataCompressionKind_ZSTD) { 267 // NOTE(rnp): limitation in the header format 268 raw_data->bytes.len = raw.len - header->raw_data_offset; 269 } else { 270 raw_data->bytes.len = header->raw_data_dimension[0] * header->raw_data_dimension[1] * 271 header->raw_data_dimension[2] * header->raw_data_dimension[3]; 272 raw_data->bytes.len *= beamformer_data_kind_byte_size[header->raw_data_kind]; 273 } 274 } 275 276 // NOTE(rnp): only look at the first emission descriptor, other cases aren't currently relevant 277 { 278 ZBP_EmissionDescriptor *ed = (ZBP_EmissionDescriptor *)(raw.data + header->emission_descriptors_offset); 279 switch (ed->emission_kind) { 280 281 case ZBP_EmissionKind_Sine:{ 282 ZBP_EmissionSineParameters *ep = (ZBP_EmissionSineParameters *)(raw.data + ed->parameters_offset); 283 bp->emission_parameters.kind = BeamformerEmissionKind_Sine; 284 bp->emission_parameters.sine.cycles = ep->cycles; 285 bp->emission_parameters.sine.frequency = ep->frequency; 286 }break; 287 288 case ZBP_EmissionKind_Chirp:{ 289 ZBP_EmissionChirpParameters *ep = (ZBP_EmissionChirpParameters *)(raw.data + ed->parameters_offset); 290 bp->emission_parameters.kind = BeamformerEmissionKind_Chirp; 291 bp->emission_parameters.chirp.duration = ep->duration; 292 bp->emission_parameters.chirp.min_frequency = ep->min_frequency; 293 bp->emission_parameters.chirp.max_frequency = ep->max_frequency; 294 }break; 295 296 InvalidDefaultCase; 297 static_assert(ZBP_EmissionKind_Count == (ZBP_EmissionKind_Chirp + 1), ""); 298 } 299 } 300 301 switch (header->acquisition_mode) { 302 case ZBP_AcquisitionKind_FORCES:{}break; 303 304 case ZBP_AcquisitionKind_HERCULES:{ 305 ZBP_HERCULESParameters *p = (ZBP_HERCULESParameters *)(raw.data + header->acquisition_parameters_offset); 306 bp->transmit_receive_orientation = p->transmit_focus.transmit_receive_orientation; 307 bp->focal_vector.E[0] = p->transmit_focus.steering_angle; 308 bp->focal_vector.E[1] = p->transmit_focus.focal_depth; 309 310 bp->single_focus = 1; 311 bp->single_orientation = 1; 312 }break; 313 314 case ZBP_AcquisitionKind_UFORCES:{ 315 ZBP_uFORCESParameters *p = (ZBP_uFORCESParameters *)(raw.data + header->acquisition_parameters_offset); 316 mem_copy(bp->sparse_elements, raw.data + p->sparse_elements_offset, 317 sizeof(*bp->sparse_elements) * bp->acquisition_count); 318 }break; 319 320 case ZBP_AcquisitionKind_UHERCULES:{ 321 ZBP_uHERCULESParameters *p = (ZBP_uHERCULESParameters *)(raw.data + header->acquisition_parameters_offset); 322 bp->transmit_receive_orientation = p->transmit_focus.transmit_receive_orientation; 323 bp->focal_vector.E[0] = p->transmit_focus.steering_angle; 324 bp->focal_vector.E[1] = p->transmit_focus.focal_depth; 325 326 bp->single_focus = 1; 327 bp->single_orientation = 1; 328 329 mem_copy(bp->sparse_elements, raw.data + p->sparse_elements_offset, 330 sizeof(*bp->sparse_elements) * bp->acquisition_count); 331 }break; 332 333 case ZBP_AcquisitionKind_RCA_TPW:{ 334 ZBP_TPWParameters *p = (ZBP_TPWParameters *)(raw.data + header->acquisition_parameters_offset); 335 336 mem_copy(bp->transmit_receive_orientations, raw.data + p->transmit_receive_orientations_offset, 337 sizeof(*bp->transmit_receive_orientations) * bp->acquisition_count); 338 mem_copy(bp->steering_angles, raw.data + p->tilting_angles_offset, 339 sizeof(*bp->steering_angles) * bp->acquisition_count); 340 341 for EachIndex(bp->acquisition_count, it) 342 bp->focal_depths[it] = F32_INFINITY; 343 }break; 344 345 case ZBP_AcquisitionKind_RCA_VLS:{ 346 ZBP_VLSParameters *p = (ZBP_VLSParameters *)(raw.data + header->acquisition_parameters_offset); 347 348 mem_copy(bp->transmit_receive_orientations, raw.data + p->transmit_receive_orientations_offset, 349 sizeof(*bp->transmit_receive_orientations) * bp->acquisition_count); 350 351 f32 *focal_depths = (f32 *)(raw.data + p->focal_depths_offset); 352 f32 *origin_offsets = (f32 *)(raw.data + p->origin_offsets_offset); 353 354 for EachIndex(bp->acquisition_count, it) { 355 f32 sign = Sign(focal_depths[it]); 356 f32 depth = focal_depths[it]; 357 f32 origin = origin_offsets[it]; 358 bp->steering_angles[it] = atan2_f32(origin, -depth) * 180.0f / PI; 359 bp->focal_depths[it] = sign * sqrt_f32(depth * depth + origin * origin); 360 } 361 }break; 362 363 InvalidDefaultCase; 364 } 365 366 }break; 367 368 default:{return 0;}break; 369 } 370 371 return 1; 372 } 373 374 #define shift_n(v, c, n) v += n, c -= n 375 #define shift(v, c) shift_n(v, c, 1) 376 377 function void 378 usage(char *argv0) 379 { 380 die("%s [--loop] [--cuda] [--frame n] parameters_file\n" 381 " --loop: reupload data forever\n" 382 " --cuda: use cuda for decoding\n" 383 " --frame n: use frame n of the data for display\n", 384 argv0); 385 } 386 387 function Options 388 parse_argv(i32 argc, char *argv[]) 389 { 390 Options result = {0}; 391 392 char *argv0 = argv[0]; 393 shift(argv, argc); 394 395 while (argc > 0) { 396 s8 arg = c_str_to_s8(*argv); 397 398 if (s8_equal(arg, s8("--loop"))) { 399 shift(argv, argc); 400 result.loop = 1; 401 } else if (s8_equal(arg, s8("--cuda"))) { 402 shift(argv, argc); 403 result.cuda = 1; 404 } else if (s8_equal(arg, s8("--frame"))) { 405 shift(argv, argc); 406 if (argc) { 407 result.frame_number = (u32)atoi(*argv); 408 shift(argv, argc); 409 } 410 } else if (arg.len > 0 && arg.data[0] == '-') { 411 usage(argv0); 412 } else { 413 break; 414 } 415 } 416 417 result.remaining = argv; 418 result.remaining_count = argc; 419 420 return result; 421 } 422 423 function b32 424 send_frame(void *restrict data, BeamformerSimpleParameters *restrict bp) 425 { 426 u32 data_size = bp->raw_data_dimensions.E[0] * bp->raw_data_dimensions.E[1] 427 * beamformer_data_kind_byte_size[bp->data_kind]; 428 b32 result = beamformer_push_data_with_compute(data, data_size, BeamformerViewPlaneTag_XZ, 0); 429 if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string()); 430 431 return result; 432 } 433 434 function void 435 execute_study(Arena arena, Stream path, Options *options) 436 { 437 i32 path_work_index = path.widx; 438 stream_ensure_termination(&path, 0); 439 440 ZBP_Data raw_data = {0}; 441 BeamformerSimpleParameters bp = {0}; 442 if (!beamformer_simple_parameters_from_zbp_file(&bp, (char *)path.data, &raw_data)) 443 die("failed to load parameters file: %s\n", (char *)path.data); 444 445 v3 min_coordinate = (v3){{g_lateral_extent.x, g_axial_extent.x, 0}}; 446 v3 max_coordinate = (v3){{g_lateral_extent.y, g_axial_extent.y, 0}}; 447 bp.das_voxel_transform = das_transform(min_coordinate, max_coordinate, &g_output_points); 448 449 bp.output_points.xyz = g_output_points; 450 bp.output_points.w = 1; 451 452 bp.f_number = g_f_number; 453 bp.interpolation_mode = BeamformerInterpolationMode_Cubic; 454 455 bp.decimation_rate = 1; 456 457 if (bp.data_kind != BeamformerDataKind_Float32Complex && 458 bp.data_kind != BeamformerDataKind_Int16Complex) 459 { 460 bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Demodulate; 461 } 462 if (options->cuda) bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_CudaDecode; 463 else bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Decode; 464 bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_DAS; 465 466 { 467 BeamformerFilterParameters filter = {.sampling_frequency = bp.sampling_frequency / 2}; 468 469 BeamformerEmissionParameters *ep = &bp.emission_parameters; 470 switch (bp.emission_parameters.kind) { 471 472 case BeamformerEmissionKind_Sine:{ 473 filter.kind = BeamformerFilterKind_Kaiser; 474 filter.kaiser.beta = 5.65f; 475 filter.kaiser.cutoff_frequency = 0.5f * ep->sine.frequency; 476 filter.kaiser.length = 36; 477 }break; 478 479 case BeamformerEmissionKind_Chirp:{ 480 filter.kind = BeamformerFilterKind_MatchedChirp; 481 filter.matched_chirp.duration = ep->chirp.duration; 482 filter.matched_chirp.min_frequency = ep->chirp.min_frequency - bp.demodulation_frequency; 483 filter.matched_chirp.max_frequency = ep->chirp.max_frequency - bp.demodulation_frequency; 484 filter.complex = 1; 485 486 //bp.time_offset += ep->chirp.duration / 2; 487 }break; 488 489 InvalidDefaultCase; 490 } 491 492 beamformer_create_filter(&filter, 0, 0); 493 494 bp.compute_stage_parameters[0] = 0; 495 } 496 497 beamformer_push_simple_parameters(&bp); 498 499 beamformer_set_global_timeout(1000); 500 501 void *data = 0; 502 if (raw_data.bytes.len == 0) { 503 // NOTE(rnp): strip ".bp" 504 stream_reset(&path, path_work_index - 3); 505 506 stream_append_byte(&path, '_'); 507 stream_append_u64_width(&path, options->frame_number, 2); 508 stream_append_s8(&path, s8(".zst")); 509 stream_ensure_termination(&path, 0); 510 s8 compressed_data = os_read_file_simp((char *)path.data); 511 512 data = decompress_zstd_data(compressed_data); 513 if (!data) 514 die("failed to decompress data: %s\n", path.data); 515 free(compressed_data.data); 516 } else { 517 if (raw_data.compression_kind == ZBP_DataCompressionKind_ZSTD) { 518 data = decompress_zstd_data(raw_data.bytes); 519 if (!data) 520 die("failed to decompress data: %s\n", path.data); 521 } else { 522 data = raw_data.bytes.data; 523 } 524 } 525 526 if (options->loop) { 527 BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1}; 528 s8 short_name = s8("Throughput"); 529 mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len); 530 lip.save_name_tag_length = (i32)short_name.len; 531 beamformer_set_live_parameters(&lip); 532 533 u32 frame = 0; 534 f32 times[32] = {0}; 535 f32 data_size = (f32)(bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1] 536 * beamformer_data_kind_byte_size[bp.data_kind]); 537 u64 start = os_timer_count(); 538 f64 frequency = os_timer_frequency(); 539 for (;!g_should_exit;) { 540 if (send_frame(data, &bp)) { 541 u64 now = os_timer_count(); 542 f64 delta = (now - start) / frequency; 543 start = now; 544 545 if ((frame % 16) == 0) { 546 f32 sum = 0; 547 for (u32 i = 0; i < countof(times); i++) 548 sum += times[i] / countof(times); 549 printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n", 550 delta * 1e3, sum * 1e3, data_size / (sum * (GB(1)))); 551 } 552 553 times[frame % countof(times)] = delta; 554 frame++; 555 } 556 i32 flag = beamformer_live_parameters_get_dirty_flag(); 557 if (flag != -1 && (1 << flag) == BeamformerLiveImagingDirtyFlags_StopImaging) 558 break; 559 } 560 561 lip.active = 0; 562 beamformer_set_live_parameters(&lip); 563 } else { 564 send_frame(data, &bp); 565 } 566 } 567 568 function void 569 sigint(i32 _signo) 570 { 571 g_should_exit = 1; 572 } 573 574 extern i32 575 main(i32 argc, char *argv[]) 576 { 577 Options options = parse_argv(argc, argv); 578 579 if (options.remaining_count != 1) 580 usage(argv[0]); 581 582 signal(SIGINT, sigint); 583 584 Arena arena = os_alloc_arena(KB(8)); 585 Stream path = stream_alloc(&arena, KB(4)); 586 stream_append_s8(&path, c_str_to_s8(options.remaining[0])); 587 588 execute_study(arena, path, &options); 589 590 return 0; 591 }