throughput.c (19220B)
1 /* See LICENSE for license details. */ 2 /* TODO(rnp): 3 * [ ]: for finer grained evaluation of throughput latency just queue a data upload 4 * without replacing the data. 5 * [ ]: bug: we aren't inserting rf data between each frame 6 */ 7 8 #define BEAMFORMER_LIB_EXPORT function 9 #include "ogl_beamformer_lib.c" 10 11 #include <signal.h> 12 #include <stdarg.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <zstd.h> 16 17 global iv3 g_output_points = {{512, 1, 1024}}; 18 global v2 g_axial_extent = {{ 10e-3f, 165e-3f}}; 19 global v2 g_lateral_extent = {{-60e-3f, 60e-3f}}; 20 global f32 g_f_number = 0.5f; 21 22 typedef struct { 23 b32 loop; 24 b32 cuda; 25 u32 frame_number; 26 27 char **remaining; 28 i32 remaining_count; 29 } Options; 30 31 #include "external/zemp_bp.h" 32 33 typedef struct { 34 ZBP_DataKind kind; 35 ZBP_DataCompressionKind compression_kind; 36 s8 bytes; 37 } ZBP_Data; 38 39 global b32 g_should_exit; 40 41 #define die(...) die_((char *)__func__, __VA_ARGS__) 42 function no_return void 43 die_(char *function_name, char *format, ...) 44 { 45 if (function_name) 46 fprintf(stderr, "%s: ", function_name); 47 48 va_list ap; 49 50 va_start(ap, format); 51 vfprintf(stderr, format, ap); 52 va_end(ap); 53 54 os_exit(1); 55 } 56 57 #if OS_LINUX 58 59 #include <fcntl.h> 60 #include <sys/stat.h> 61 #include <unistd.h> 62 63 function s8 64 os_read_file_simp(char *fname) 65 { 66 s8 result; 67 i32 fd = open(fname, O_RDONLY); 68 if (fd < 0) 69 die("couldn't open file: %s\n", fname); 70 71 struct stat st; 72 if (stat(fname, &st) < 0) 73 die("couldn't stat file\n"); 74 75 result.len = st.st_size; 76 result.data = malloc((uz)st.st_size); 77 if (!result.data) 78 die("couldn't alloc space for reading\n"); 79 80 iz rlen = read(fd, result.data, (u32)st.st_size); 81 close(fd); 82 83 if (rlen != st.st_size) 84 die("couldn't read file: %s\n", fname); 85 86 return result; 87 } 88 89 #elif OS_WINDOWS 90 91 function s8 92 os_read_file_simp(char *fname) 93 { 94 s8 result; 95 iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); 96 if (h == INVALID_FILE) 97 die("couldn't open file: %s\n", fname); 98 99 w32_file_info fileinfo; 100 if (!GetFileInformationByHandle(h, &fileinfo)) 101 die("couldn't get file info\n", stderr); 102 103 result.len = fileinfo.nFileSizeLow; 104 result.data = malloc(fileinfo.nFileSizeLow); 105 if (!result.data) 106 die("couldn't alloc space for reading\n"); 107 108 i32 rlen = 0; 109 if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow) 110 die("couldn't read file: %s\n", fname); 111 CloseHandle(h); 112 113 return result; 114 } 115 116 #else 117 #error Unsupported Platform 118 #endif 119 120 function void 121 stream_ensure_termination(Stream *s, u8 byte) 122 { 123 b32 found = 0; 124 if (!s->errors && s->widx > 0) 125 found = s->data[s->widx - 1] == byte; 126 if (!found) { 127 s->errors |= s->cap - 1 < s->widx; 128 if (!s->errors) 129 s->data[s->widx++] = byte; 130 } 131 } 132 133 function void * 134 decompress_zstd_data(s8 raw) 135 { 136 uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len); 137 void *out = malloc(requested_size); 138 if (out) { 139 uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len); 140 if (decompressed != requested_size) { 141 free(out); 142 out = 0; 143 } 144 } 145 return out; 146 } 147 148 function b32 149 beamformer_simple_parameters_from_zbp_file(BeamformerSimpleParameters *bp, char *path, ZBP_Data *raw_data) 150 { 151 s8 raw = os_read_file_simp(path); 152 if (raw.len < (iz)sizeof(ZBP_BaseHeader) || ((ZBP_BaseHeader *)raw.data)->magic != ZBP_HeaderMagic) 153 return 0; 154 155 switch (((ZBP_BaseHeader *)raw.data)->major) { 156 157 case 1:{ 158 ZBP_HeaderV1 *header = (ZBP_HeaderV1 *)raw.data; 159 160 bp->sample_count = header->sample_count; 161 bp->channel_count = header->channel_count; 162 bp->acquisition_count = header->receive_event_count; 163 164 bp->sampling_mode = BeamformerSamplingMode_4X; 165 bp->acquisition_kind = header->beamform_mode; 166 bp->decode_mode = header->decode_mode; 167 bp->sampling_frequency = header->sampling_frequency; 168 bp->demodulation_frequency = header->sampling_frequency / 4; 169 bp->speed_of_sound = header->speed_of_sound; 170 bp->time_offset = header->time_offset; 171 172 mem_copy(bp->channel_mapping, header->channel_mapping, sizeof(*bp->channel_mapping) * bp->channel_count); 173 mem_copy(bp->xdc_transform.E, header->transducer_transform_matrix, sizeof(bp->xdc_transform)); 174 mem_copy(bp->xdc_element_pitch.E, header->transducer_element_pitch, sizeof(bp->xdc_element_pitch)); 175 // NOTE(rnp): ignores emission count and ensemble count 176 mem_copy(bp->raw_data_dimensions.E, header->raw_data_dimension, sizeof(bp->raw_data_dimensions)); 177 178 bp->data_kind = ZBP_DataKind_Int16; 179 raw_data->kind = ZBP_DataKind_Int16; 180 raw_data->compression_kind = ZBP_DataCompressionKind_ZSTD; 181 182 read_only local_persist u8 transmit_mode_to_orientation[] = { 183 [0] = (ZBP_RCAOrientation_Rows << 4) | ZBP_RCAOrientation_Rows, 184 [1] = (ZBP_RCAOrientation_Rows << 4) | ZBP_RCAOrientation_Columns, 185 [2] = (ZBP_RCAOrientation_Columns << 4) | ZBP_RCAOrientation_Rows, 186 [3] = (ZBP_RCAOrientation_Columns << 4) | ZBP_RCAOrientation_Columns, 187 }; 188 if (header->transmit_mode >= countof(transmit_mode_to_orientation)) 189 return 0; 190 191 bp->transmit_receive_orientation = transmit_mode_to_orientation[header->transmit_mode]; 192 193 ZBP_AcquisitionKind acquisition_kind = header->beamform_mode; 194 if (acquisition_kind == ZBP_AcquisitionKind_FORCES || 195 acquisition_kind == ZBP_AcquisitionKind_HERCULES || 196 acquisition_kind == ZBP_AcquisitionKind_UFORCES || 197 acquisition_kind == ZBP_AcquisitionKind_UHERCULES) 198 { 199 bp->single_focus = 1; 200 bp->single_orientation = 1; 201 bp->focal_vector.E[0] = header->steering_angles[0]; 202 bp->focal_vector.E[1] = header->focal_depths[0]; 203 } 204 205 if (acquisition_kind == ZBP_AcquisitionKind_UFORCES || 206 acquisition_kind == ZBP_AcquisitionKind_UHERCULES) 207 { 208 mem_copy(bp->sparse_elements, header->sparse_elements, sizeof(*bp->sparse_elements) * bp->acquisition_count); 209 } 210 211 if (acquisition_kind == ZBP_AcquisitionKind_RCA_TPW || 212 acquisition_kind == ZBP_AcquisitionKind_RCA_VLS) 213 { 214 mem_copy(bp->focal_depths, header->focal_depths, sizeof(*bp->focal_depths) * bp->acquisition_count); 215 mem_copy(bp->steering_angles, header->steering_angles, sizeof(*bp->steering_angles) * bp->acquisition_count); 216 for EachIndex(bp->acquisition_count, it) 217 bp->transmit_receive_orientations[it] = bp->transmit_receive_orientation; 218 } 219 220 bp->emission_kind = BeamformerEmissionKind_Sine; 221 bp->emission_parameters.sine.cycles = 2; 222 bp->emission_parameters.sine.frequency = bp->demodulation_frequency; 223 }break; 224 225 case 2:{ 226 ZBP_HeaderV2 *header = (ZBP_HeaderV2 *)raw.data; 227 228 bp->sample_count = header->sample_count; 229 bp->channel_count = header->channel_count; 230 bp->acquisition_count = header->receive_event_count; 231 232 read_only local_persist BeamformerSamplingMode zbp_sampling_mode_to_beamformer[] = { 233 [ZBP_SamplingMode_Standard] = BeamformerSamplingMode_4X, 234 [ZBP_SamplingMode_Bandpass] = BeamformerSamplingMode_2X, 235 }; 236 bp->sampling_mode = zbp_sampling_mode_to_beamformer[header->sampling_mode]; 237 238 bp->acquisition_kind = header->acquisition_mode; 239 bp->decode_mode = header->decode_mode; 240 bp->sampling_frequency = header->sampling_frequency; 241 bp->demodulation_frequency = header->demodulation_frequency; 242 bp->speed_of_sound = header->speed_of_sound; 243 bp->time_offset = header->time_offset; 244 245 //bp->contrast_mode = header->contrast_mode; 246 247 if (header->channel_mapping_offset != -1) { 248 mem_copy(bp->channel_mapping, raw.data + header->channel_mapping_offset, 249 sizeof(*bp->channel_mapping) * bp->channel_count); 250 } else { 251 for EachIndex(bp->channel_count, it) 252 bp->channel_mapping[it] = it; 253 } 254 255 mem_copy(bp->xdc_transform.E, header->transducer_transform_matrix, sizeof(bp->xdc_transform)); 256 mem_copy(bp->xdc_element_pitch.E, header->transducer_element_pitch, sizeof(bp->xdc_element_pitch)); 257 // NOTE(rnp): ignores group count and ensemble count 258 mem_copy(bp->raw_data_dimensions.E, header->raw_data_dimension, sizeof(bp->raw_data_dimensions)); 259 260 bp->data_kind = header->raw_data_kind; 261 raw_data->kind = header->raw_data_kind; 262 raw_data->compression_kind = header->raw_data_compression_kind; 263 264 if (header->raw_data_offset != -1) { 265 raw_data->bytes.data = raw.data + header->raw_data_offset; 266 if (raw_data->compression_kind == ZBP_DataCompressionKind_ZSTD) { 267 // NOTE(rnp): limitation in the header format 268 raw_data->bytes.len = raw.len - header->raw_data_offset; 269 } else { 270 raw_data->bytes.len = header->raw_data_dimension[0] * header->raw_data_dimension[1] * 271 header->raw_data_dimension[2] * header->raw_data_dimension[3]; 272 raw_data->bytes.len *= beamformer_data_kind_byte_size[header->raw_data_kind]; 273 } 274 } 275 276 // NOTE(rnp): only look at the first emission descriptor, other cases aren't currently relevant 277 { 278 ZBP_EmissionDescriptor *ed = (ZBP_EmissionDescriptor *)(raw.data + header->emission_descriptors_offset); 279 switch (ed->emission_kind) { 280 281 case ZBP_EmissionKind_Sine:{ 282 ZBP_EmissionSineParameters *ep = (ZBP_EmissionSineParameters *)(raw.data + ed->parameters_offset); 283 bp->emission_kind = BeamformerEmissionKind_Sine; 284 bp->emission_parameters.sine.cycles = ep->cycles; 285 bp->emission_parameters.sine.frequency = ep->frequency; 286 }break; 287 288 case ZBP_EmissionKind_Chirp:{ 289 ZBP_EmissionChirpParameters *ep = (ZBP_EmissionChirpParameters *)(raw.data + ed->parameters_offset); 290 bp->emission_kind = BeamformerEmissionKind_Chirp; 291 bp->emission_parameters.chirp.duration = ep->duration; 292 bp->emission_parameters.chirp.min_frequency = ep->min_frequency; 293 bp->emission_parameters.chirp.max_frequency = ep->max_frequency; 294 }break; 295 296 InvalidDefaultCase; 297 static_assert(ZBP_EmissionKind_Count == (ZBP_EmissionKind_Chirp + 1), ""); 298 } 299 } 300 301 switch (header->acquisition_mode) { 302 case ZBP_AcquisitionKind_FORCES:{}break; 303 304 case ZBP_AcquisitionKind_HERCULES:{ 305 ZBP_HERCULESParameters *p = (ZBP_HERCULESParameters *)(raw.data + header->acquisition_parameters_offset); 306 bp->transmit_receive_orientation = p->transmit_focus.transmit_receive_orientation; 307 bp->focal_vector.E[0] = p->transmit_focus.steering_angle; 308 bp->focal_vector.E[1] = p->transmit_focus.focal_depth; 309 310 bp->single_focus = 1; 311 bp->single_orientation = 1; 312 }break; 313 314 case ZBP_AcquisitionKind_UFORCES:{ 315 ZBP_uFORCESParameters *p = (ZBP_uFORCESParameters *)(raw.data + header->acquisition_parameters_offset); 316 mem_copy(bp->sparse_elements, raw.data + p->sparse_elements_offset, 317 sizeof(*bp->sparse_elements) * bp->acquisition_count); 318 }break; 319 320 case ZBP_AcquisitionKind_UHERCULES:{ 321 ZBP_uHERCULESParameters *p = (ZBP_uHERCULESParameters *)(raw.data + header->acquisition_parameters_offset); 322 bp->transmit_receive_orientation = p->transmit_focus.transmit_receive_orientation; 323 bp->focal_vector.E[0] = p->transmit_focus.steering_angle; 324 bp->focal_vector.E[1] = p->transmit_focus.focal_depth; 325 326 bp->single_focus = 1; 327 bp->single_orientation = 1; 328 329 mem_copy(bp->sparse_elements, raw.data + p->sparse_elements_offset, 330 sizeof(*bp->sparse_elements) * bp->acquisition_count); 331 }break; 332 333 case ZBP_AcquisitionKind_RCA_TPW:{ 334 ZBP_TPWParameters *p = (ZBP_TPWParameters *)(raw.data + header->acquisition_parameters_offset); 335 336 mem_copy(bp->transmit_receive_orientations, raw.data + p->transmit_receive_orientations_offset, 337 sizeof(*bp->transmit_receive_orientations) * bp->acquisition_count); 338 mem_copy(bp->steering_angles, raw.data + p->tilting_angles_offset, 339 sizeof(*bp->steering_angles) * bp->acquisition_count); 340 341 for EachIndex(bp->acquisition_count, it) 342 bp->focal_depths[it] = F32_INFINITY; 343 }break; 344 345 case ZBP_AcquisitionKind_RCA_VLS:{ 346 ZBP_VLSParameters *p = (ZBP_VLSParameters *)(raw.data + header->acquisition_parameters_offset); 347 348 mem_copy(bp->transmit_receive_orientations, raw.data + p->transmit_receive_orientations_offset, 349 sizeof(*bp->transmit_receive_orientations) * bp->acquisition_count); 350 351 f32 *focal_depths = (f32 *)(raw.data + p->focal_depths_offset); 352 f32 *origin_offsets = (f32 *)(raw.data + p->origin_offsets_offset); 353 354 for EachIndex(bp->acquisition_count, it) { 355 f32 sign = Sign(focal_depths[it]); 356 f32 depth = focal_depths[it]; 357 f32 origin = origin_offsets[it]; 358 bp->steering_angles[it] = atan2_f32(origin, -depth) * 180.0f / PI; 359 bp->focal_depths[it] = sign * sqrt_f32(depth * depth + origin * origin); 360 } 361 }break; 362 363 InvalidDefaultCase; 364 } 365 366 }break; 367 368 default:{return 0;}break; 369 } 370 371 return 1; 372 } 373 374 #define shift_n(v, c, n) v += n, c -= n 375 #define shift(v, c) shift_n(v, c, 1) 376 377 function void 378 usage(char *argv0) 379 { 380 die("%s [--loop] [--cuda] [--frame n] parameters_file\n" 381 " --loop: reupload data forever\n" 382 " --cuda: use cuda for decoding\n" 383 " --frame n: use frame n of the data for display\n", 384 argv0); 385 } 386 387 function Options 388 parse_argv(i32 argc, char *argv[]) 389 { 390 Options result = {0}; 391 392 char *argv0 = argv[0]; 393 shift(argv, argc); 394 395 while (argc > 0) { 396 s8 arg = c_str_to_s8(*argv); 397 398 if (s8_equal(arg, s8("--loop"))) { 399 shift(argv, argc); 400 result.loop = 1; 401 } else if (s8_equal(arg, s8("--cuda"))) { 402 shift(argv, argc); 403 result.cuda = 1; 404 } else if (s8_equal(arg, s8("--frame"))) { 405 shift(argv, argc); 406 if (argc) { 407 result.frame_number = (u32)atoi(*argv); 408 shift(argv, argc); 409 } 410 } else if (arg.len > 0 && arg.data[0] == '-') { 411 usage(argv0); 412 } else { 413 break; 414 } 415 } 416 417 result.remaining = argv; 418 result.remaining_count = argc; 419 420 return result; 421 } 422 423 function b32 424 send_frame(void *restrict data, BeamformerSimpleParameters *restrict bp) 425 { 426 u32 data_size = bp->raw_data_dimensions.E[0] * bp->raw_data_dimensions.E[1] 427 * beamformer_data_kind_byte_size[bp->data_kind]; 428 b32 result = beamformer_push_data_with_compute(data, data_size, BeamformerViewPlaneTag_XZ, 0); 429 if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string()); 430 431 return result; 432 } 433 434 function void 435 execute_study(Arena arena, Stream path, Options *options) 436 { 437 i32 path_work_index = path.widx; 438 stream_ensure_termination(&path, 0); 439 440 ZBP_Data raw_data = {0}; 441 BeamformerSimpleParameters bp = {0}; 442 if (!beamformer_simple_parameters_from_zbp_file(&bp, (char *)path.data, &raw_data)) 443 die("failed to load parameters file: %s\n", (char *)path.data); 444 445 v3 min_coordinate = (v3){{g_lateral_extent.x, g_axial_extent.x, 0}}; 446 v3 max_coordinate = (v3){{g_lateral_extent.y, g_axial_extent.y, 0}}; 447 bp.das_voxel_transform = das_transform(min_coordinate, max_coordinate, &g_output_points); 448 449 bp.output_points.xyz = g_output_points; 450 bp.output_points.w = 1; 451 452 bp.f_number = g_f_number; 453 bp.interpolation_mode = BeamformerInterpolationMode_Cubic; 454 455 bp.decimation_rate = 1; 456 457 if (bp.data_kind != BeamformerDataKind_Float32Complex && 458 bp.data_kind != BeamformerDataKind_Int16Complex) 459 { 460 bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Demodulate; 461 } 462 if (options->cuda) bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_CudaDecode; 463 else bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Decode; 464 bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_DAS; 465 466 { 467 BeamformerFilterParameters filter = {0}; 468 BeamformerFilterKind filter_kind = 0; 469 b32 complex = 0; 470 u32 size = 0; 471 472 BeamformerEmissionParameters *ep = &bp.emission_parameters; 473 switch (bp.emission_kind) { 474 475 case BeamformerEmissionKind_Sine:{ 476 filter_kind = BeamformerFilterKind_Kaiser; 477 filter.kaiser.beta = 5.65f; 478 filter.kaiser.cutoff_frequency = 0.5f * ep->sine.frequency; 479 filter.kaiser.length = 36; 480 size = sizeof(filter.kaiser); 481 }break; 482 483 case BeamformerEmissionKind_Chirp:{ 484 filter_kind = BeamformerFilterKind_MatchedChirp; 485 486 filter.matched_chirp.duration = ep->chirp.duration; 487 filter.matched_chirp.min_frequency = ep->chirp.min_frequency - bp.demodulation_frequency; 488 filter.matched_chirp.max_frequency = ep->chirp.max_frequency - bp.demodulation_frequency; 489 size = sizeof(filter.matched_chirp); 490 complex = 1; 491 492 //bp.time_offset += ep->chirp.duration / 2; 493 }break; 494 495 InvalidDefaultCase; 496 } 497 498 beamformer_create_filter(filter_kind, (f32 *)&filter.kaiser, size, bp.sampling_frequency / 2, 499 complex, 0, 0); 500 501 bp.compute_stage_parameters[0] = 0; 502 } 503 504 beamformer_push_simple_parameters(&bp); 505 506 beamformer_set_global_timeout(1000); 507 508 void *data = 0; 509 if (raw_data.bytes.len == 0) { 510 // NOTE(rnp): strip ".bp" 511 stream_reset(&path, path_work_index - 3); 512 513 stream_append_byte(&path, '_'); 514 stream_append_u64_width(&path, options->frame_number, 2); 515 stream_append_s8(&path, s8(".zst")); 516 stream_ensure_termination(&path, 0); 517 s8 compressed_data = os_read_file_simp((char *)path.data); 518 519 data = decompress_zstd_data(compressed_data); 520 if (!data) 521 die("failed to decompress data: %s\n", path.data); 522 free(compressed_data.data); 523 } else { 524 if (raw_data.compression_kind == ZBP_DataCompressionKind_ZSTD) { 525 data = decompress_zstd_data(raw_data.bytes); 526 if (!data) 527 die("failed to decompress data: %s\n", path.data); 528 } else { 529 data = raw_data.bytes.data; 530 } 531 } 532 533 if (options->loop) { 534 BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1}; 535 s8 short_name = s8("Throughput"); 536 mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len); 537 lip.save_name_tag_length = (i32)short_name.len; 538 beamformer_set_live_parameters(&lip); 539 540 u32 frame = 0; 541 f32 times[32] = {0}; 542 f32 data_size = (f32)(bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1] 543 * beamformer_data_kind_byte_size[bp.data_kind]); 544 u64 start = os_timer_count(); 545 f64 frequency = os_timer_frequency(); 546 for (;!g_should_exit;) { 547 if (send_frame(data, &bp)) { 548 u64 now = os_timer_count(); 549 f64 delta = (now - start) / frequency; 550 start = now; 551 552 if ((frame % 16) == 0) { 553 f32 sum = 0; 554 for (u32 i = 0; i < countof(times); i++) 555 sum += times[i] / countof(times); 556 printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n", 557 delta * 1e3, sum * 1e3, data_size / (sum * (GB(1)))); 558 } 559 560 times[frame % countof(times)] = delta; 561 frame++; 562 } 563 i32 flag = beamformer_live_parameters_get_dirty_flag(); 564 if (flag != -1 && (1 << flag) == BeamformerLiveImagingDirtyFlags_StopImaging) 565 break; 566 } 567 568 lip.active = 0; 569 beamformer_set_live_parameters(&lip); 570 } else { 571 send_frame(data, &bp); 572 } 573 } 574 575 function void 576 sigint(i32 _signo) 577 { 578 g_should_exit = 1; 579 } 580 581 extern i32 582 main(i32 argc, char *argv[]) 583 { 584 Options options = parse_argv(argc, argv); 585 586 if (options.remaining_count != 1) 587 usage(argv[0]); 588 589 signal(SIGINT, sigint); 590 591 Arena arena = os_alloc_arena(KB(8)); 592 Stream path = stream_alloc(&arena, KB(4)); 593 stream_append_s8(&path, c_str_to_s8(options.remaining[0])); 594 595 execute_study(arena, path, &options); 596 597 return 0; 598 }