throughput.c (13306B)
1 /* See LICENSE for license details. */ 2 /* TODO(rnp): 3 * [ ]: for finer grained evaluation of throughput latency just queue a data upload 4 * without replacing the data. 5 * [ ]: bug: we aren't inserting rf data between each frame 6 */ 7 8 #define LIB_FN function 9 #include "ogl_beamformer_lib.c" 10 11 #include <signal.h> 12 #include <stdarg.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <zstd.h> 16 17 global iv3 g_output_points = {{512, 1, 1024}}; 18 global v2 g_axial_extent = {{ 10e-3f, 165e-3f}}; 19 global v2 g_lateral_extent = {{-60e-3f, 60e-3f}}; 20 global f32 g_f_number = 0.5f; 21 22 typedef struct { 23 b32 loop; 24 b32 cuda; 25 u32 frame_number; 26 27 char **remaining; 28 i32 remaining_count; 29 } Options; 30 31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull 32 typedef struct { 33 u64 magic; 34 u32 version; 35 u16 decode_mode; 36 u16 beamform_mode; 37 u32 raw_data_dim[4]; 38 u32 decoded_data_dim[4]; 39 f32 xdc_element_pitch[2]; 40 f32 xdc_transform[16]; /* NOTE: column major order */ 41 i16 channel_mapping[256]; 42 f32 transmit_angles[256]; 43 f32 focal_depths[256]; 44 i16 sparse_elements[256]; 45 i16 hadamard_rows[256]; 46 f32 speed_of_sound; 47 f32 center_frequency; 48 f32 sampling_frequency; 49 f32 time_offset; 50 u32 transmit_mode; 51 } zemp_bp_v1; 52 53 global b32 g_should_exit; 54 55 #define die(...) die_((char *)__func__, __VA_ARGS__) 56 function no_return void 57 die_(char *function_name, char *format, ...) 58 { 59 if (function_name) 60 fprintf(stderr, "%s: ", function_name); 61 62 va_list ap; 63 64 va_start(ap, format); 65 vfprintf(stderr, format, ap); 66 va_end(ap); 67 68 os_exit(1); 69 } 70 71 #if OS_LINUX 72 73 #include <fcntl.h> 74 #include <sys/stat.h> 75 #include <unistd.h> 76 77 function f64 78 os_get_time(void) 79 { 80 f64 result = (f64)os_get_timer_counter() / (f64)os_get_timer_frequency(); 81 return result; 82 } 83 84 function s8 85 os_read_file_simp(char *fname) 86 { 87 s8 result; 88 i32 fd = open(fname, O_RDONLY); 89 if (fd < 0) 90 die("couldn't open file: %s\n", fname); 91 92 struct stat st; 93 if (stat(fname, &st) < 0) 94 die("couldn't stat file\n"); 95 96 result.len = st.st_size; 97 result.data = malloc((uz)st.st_size); 98 if (!result.data) 99 die("couldn't alloc space for reading\n"); 100 101 iz rlen = read(fd, result.data, (u32)st.st_size); 102 close(fd); 103 104 if (rlen != st.st_size) 105 die("couldn't read file: %s\n", fname); 106 107 return result; 108 } 109 110 #elif OS_WINDOWS 111 112 function f64 113 os_get_time(void) 114 { 115 f64 result = (f64)os_get_timer_counter() / (f64)os_w32_context.timer_frequency; 116 return result; 117 } 118 119 function s8 120 os_read_file_simp(char *fname) 121 { 122 s8 result; 123 iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); 124 if (h == INVALID_FILE) 125 die("couldn't open file: %s\n", fname); 126 127 w32_file_info fileinfo; 128 if (!GetFileInformationByHandle(h, &fileinfo)) 129 die("couldn't get file info\n", stderr); 130 131 result.len = fileinfo.nFileSizeLow; 132 result.data = malloc(fileinfo.nFileSizeLow); 133 if (!result.data) 134 die("couldn't alloc space for reading\n"); 135 136 i32 rlen = 0; 137 if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow) 138 die("couldn't read file: %s\n", fname); 139 CloseHandle(h); 140 141 return result; 142 } 143 144 #else 145 #error Unsupported Platform 146 #endif 147 148 function void 149 stream_ensure_termination(Stream *s, u8 byte) 150 { 151 b32 found = 0; 152 if (!s->errors && s->widx > 0) 153 found = s->data[s->widx - 1] == byte; 154 if (!found) { 155 s->errors |= s->cap - 1 < s->widx; 156 if (!s->errors) 157 s->data[s->widx++] = byte; 158 } 159 } 160 161 function void * 162 decompress_zstd_data(s8 raw) 163 { 164 uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len); 165 void *out = malloc(requested_size); 166 if (out) { 167 uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len); 168 if (decompressed != requested_size) { 169 free(out); 170 out = 0; 171 } 172 } 173 return out; 174 } 175 176 function zemp_bp_v1 * 177 read_zemp_bp_v1(u8 *path) 178 { 179 s8 raw = os_read_file_simp((char *)path); 180 zemp_bp_v1 *result = 0; 181 if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) { 182 if (((zemp_bp_v1 *)raw.data)->version == 1) 183 result = (zemp_bp_v1 *)raw.data; 184 } 185 return result; 186 } 187 188 function void 189 beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out) 190 { 191 mem_copy(out->xdc_transform.E, zbp->xdc_transform, sizeof(out->xdc_transform)); 192 mem_copy(out->xdc_element_pitch.E, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch)); 193 mem_copy(out->raw_data_dimensions.E, zbp->raw_data_dim, sizeof(out->raw_data_dimensions)); 194 195 out->sample_count = zbp->decoded_data_dim[0]; 196 out->channel_count = zbp->decoded_data_dim[1]; 197 out->acquisition_count = zbp->decoded_data_dim[2]; 198 out->decode_mode = (u8)zbp->decode_mode; 199 out->das_shader_id = zbp->beamform_mode; 200 out->time_offset = zbp->time_offset; 201 out->sampling_frequency = zbp->sampling_frequency; 202 out->demodulation_frequency = zbp->center_frequency; 203 out->speed_of_sound = zbp->speed_of_sound; 204 } 205 206 #define shift_n(v, c, n) v += n, c -= n 207 #define shift(v, c) shift_n(v, c, 1) 208 209 function void 210 usage(char *argv0) 211 { 212 die("%s [--loop] [--cuda] [--frame n] base_path study\n" 213 " --loop: reupload data forever\n" 214 " --cuda: use cuda for decoding\n" 215 " --frame n: use frame n of the data for display\n", 216 argv0); 217 } 218 219 function b32 220 s8_equal(s8 a, s8 b) 221 { 222 b32 result = a.len == b.len; 223 for (iz i = 0; result && i < a.len; i++) 224 result &= a.data[i] == b.data[i]; 225 return result; 226 } 227 228 function Options 229 parse_argv(i32 argc, char *argv[]) 230 { 231 Options result = {0}; 232 233 char *argv0 = argv[0]; 234 shift(argv, argc); 235 236 while (argc > 0) { 237 s8 arg = c_str_to_s8(*argv); 238 239 if (s8_equal(arg, s8("--loop"))) { 240 shift(argv, argc); 241 result.loop = 1; 242 } else if (s8_equal(arg, s8("--cuda"))) { 243 shift(argv, argc); 244 result.cuda = 1; 245 } else if (s8_equal(arg, s8("--frame"))) { 246 shift(argv, argc); 247 if (argc) { 248 result.frame_number = (u32)atoi(*argv); 249 shift(argv, argc); 250 } 251 } else if (arg.len > 0 && arg.data[0] == '-') { 252 usage(argv0); 253 } else { 254 break; 255 } 256 } 257 258 result.remaining = argv; 259 result.remaining_count = argc; 260 261 return result; 262 } 263 264 function i16 * 265 decompress_data_at_work_index(Stream *path_base, u32 index) 266 { 267 stream_append_byte(path_base, '_'); 268 stream_append_u64_width(path_base, index, 2); 269 stream_append_s8(path_base, s8(".zst")); 270 stream_ensure_termination(path_base, 0); 271 272 s8 compressed_data = os_read_file_simp((char *)path_base->data); 273 i16 *result = decompress_zstd_data(compressed_data); 274 if (!result) 275 die("failed to decompress data: %s\n", path_base->data); 276 free(compressed_data.data); 277 278 return result; 279 } 280 281 function b32 282 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp) 283 { 284 u32 data_size = bp->raw_data_dimensions.E[0] * bp->raw_data_dimensions.E[1] * sizeof(i16); 285 b32 result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0); 286 if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string()); 287 288 return result; 289 } 290 291 function void 292 execute_study(s8 study, Arena arena, Stream path, Options *options) 293 { 294 fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data); 295 296 stream_append_s8(&path, study); 297 stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR); 298 stream_append_s8(&path, study); 299 i32 path_work_index = path.widx; 300 301 stream_append_s8(&path, s8(".bp")); 302 stream_ensure_termination(&path, 0); 303 304 zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data); 305 if (!zbp) die("failed to unpack parameters file\n"); 306 307 BeamformerParameters bp = {0}; 308 beamformer_parameters_from_zemp_bp_v1(zbp, &bp); 309 310 bp.output_points.xyz = g_output_points; 311 bp.output_points.w = 1; 312 313 bp.output_min_coordinate.E[0] = g_lateral_extent.x; 314 bp.output_min_coordinate.E[1] = 0; 315 bp.output_min_coordinate.E[2] = g_axial_extent.x; 316 317 bp.output_max_coordinate.E[0] = g_lateral_extent.y; 318 bp.output_max_coordinate.E[1] = 0; 319 bp.output_max_coordinate.E[2] = g_axial_extent.y; 320 321 bp.f_number = g_f_number; 322 bp.beamform_plane = 0; 323 bp.interpolation_mode = BeamformerInterpolationMode_Cubic; 324 325 bp.decimation_rate = 1; 326 bp.demodulation_frequency = bp.sampling_frequency / 4; 327 328 /* NOTE(rnp): v1 files didn't specify sampling mode. it was almost always 4X */ 329 bp.sampling_mode = BeamformerSamplingMode_4X; 330 331 #if 0 332 BeamformerFilterParameters kaiser = {0}; 333 kaiser.Kaiser.beta = 5.65f; 334 kaiser.Kaiser.cutoff_frequency = 2.0e6f; 335 kaiser.Kaiser.length = 36; 336 337 beamformer_create_filter(BeamformerFilterKind_Kaiser, (f32 *)&kaiser.kaiser, 338 sizeof(kaiser.kaiser), bp.sampling_frequency / 2, 0, 0, 0); 339 beamformer_set_pipeline_stage_parameters(0, 0); 340 #endif 341 342 #if 1 343 BeamformerFilterParameters matched = {0}; 344 typeof(matched.matched_chirp) *mp = &matched.matched_chirp; 345 mp->duration = 18e-6f; 346 mp->min_frequency = 2.9e6f - bp.demodulation_frequency; 347 mp->max_frequency = 6.0e6f - bp.demodulation_frequency; 348 349 bp.time_offset += mp->duration / 2; 350 351 beamformer_create_filter(BeamformerFilterKind_MatchedChirp, (f32 *)mp, sizeof(*mp), 352 bp.sampling_frequency / 2, 1, 0, 0); 353 beamformer_set_pipeline_stage_parameters(0, 0); 354 #endif 355 356 if (zbp->sparse_elements[0] == -1) { 357 for (i16 i = 0; i < countof(zbp->sparse_elements); i++) 358 zbp->sparse_elements[i] = i; 359 } 360 361 b32 tx_rows = (zbp->transmit_mode & (1 << 1)) == 0; 362 b32 rx_rows = (zbp->transmit_mode & (1 << 0)) == 0; 363 u8 packed_tx_rx = 0; 364 if (tx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows << 4; 365 else packed_tx_rx |= BeamformerRCAOrientation_Columns << 4; 366 if (rx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows << 0; 367 else packed_tx_rx |= BeamformerRCAOrientation_Columns << 0; 368 369 if (bp.das_shader_id == BeamformerAcquisitionKind_HERCULES || 370 bp.das_shader_id == BeamformerAcquisitionKind_UHERCULES) 371 { 372 bp.single_focus = 1; 373 bp.single_orientation = 1; 374 375 bp.transmit_receive_orientation = packed_tx_rx; 376 bp.focal_vector.E[0] = zbp->transmit_angles[0]; 377 bp.focal_vector.E[1] = zbp->focal_depths[0]; 378 } else { 379 alignas(64) v2 focal_vectors[BeamformerMaxChannelCount]; 380 for (u32 i = 0; i < countof(focal_vectors); i++) 381 focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}}; 382 beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors)); 383 384 alignas(64) u8 transmit_receive_orientations[BeamformerMaxChannelCount]; 385 for (u32 i = 0; i < countof(transmit_receive_orientations); i++) 386 transmit_receive_orientations[i] = packed_tx_rx; 387 beamformer_push_transmit_receive_orientations(transmit_receive_orientations, 388 countof(transmit_receive_orientations)); 389 } 390 391 beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping)); 392 beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements)); 393 beamformer_push_parameters(&bp); 394 395 i32 shader_stages[16]; 396 u32 shader_stage_count = 0; 397 shader_stages[shader_stage_count++] = BeamformerShaderKind_Demodulate; 398 if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode; 399 else shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode; 400 shader_stages[shader_stage_count++] = BeamformerShaderKind_DAS; 401 402 beamformer_push_pipeline(shader_stages, shader_stage_count, BeamformerDataKind_Int16); 403 404 beamformer_set_global_timeout(1000); 405 406 stream_reset(&path, path_work_index); 407 i16 *data = decompress_data_at_work_index(&path, options->frame_number); 408 409 if (options->loop) { 410 BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1}; 411 s8 short_name = s8("Throughput"); 412 mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len); 413 lip.save_name_tag_length = (i32)short_name.len; 414 beamformer_set_live_parameters(&lip); 415 416 u32 frame = 0; 417 f32 times[32] = {0}; 418 f32 data_size = (f32)(bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1] * sizeof(*data)); 419 f64 start = os_get_time(); 420 for (;!g_should_exit;) { 421 if (send_frame(data, &bp)) { 422 f64 now = os_get_time(); 423 f32 delta = (f32)(now - start); 424 start = now; 425 426 if ((frame % 16) == 0) { 427 f32 sum = 0; 428 for (u32 i = 0; i < countof(times); i++) 429 sum += times[i] / countof(times); 430 printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n", 431 delta * 1e3, sum * 1e3, data_size / (sum * (GB(1)))); 432 } 433 434 times[frame % countof(times)] = delta; 435 frame++; 436 } 437 i32 flag = beamformer_live_parameters_get_dirty_flag(); 438 if (flag != -1 && (1 << flag) == BeamformerLiveImagingDirtyFlags_StopImaging) 439 break; 440 } 441 442 lip.active = 0; 443 beamformer_set_live_parameters(&lip); 444 } else { 445 for (u32 i = 0; i < zbp->raw_data_dim[2]; i++) 446 send_frame(data + i * bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1], &bp); 447 } 448 449 free(zbp); 450 free(data); 451 } 452 453 function void 454 sigint(i32 _signo) 455 { 456 g_should_exit = 1; 457 } 458 459 extern i32 460 main(i32 argc, char *argv[]) 461 { 462 Options options = parse_argv(argc, argv); 463 464 if (!BETWEEN(options.remaining_count, 1, 2)) 465 usage(argv[0]); 466 467 os_common_init(); 468 469 signal(SIGINT, sigint); 470 471 Arena arena = os_alloc_arena(KB(8)); 472 Stream path = stream_alloc(&arena, KB(4)); 473 stream_append_s8(&path, c_str_to_s8(options.remaining[0])); 474 stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR); 475 476 execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options); 477 478 return 0; 479 }